diff --git a/.github/required-checks.yml b/.github/required-checks.yml index 0180c6da623..e995dce4a4f 100644 --- a/.github/required-checks.yml +++ b/.github/required-checks.yml @@ -8,10 +8,10 @@ # An empty list (or missing key) for an event type disables enforcement # for that event — useful for bootstrapping. merge_group: &full_matrix -- Build and test (arm64, gcc11, openmpi) / Dev environment (Debug) -- Build and test (arm64, gcc11, openmpi) / Dev environment (Python) -- Build and test (arm64, gcc12, openmpi) / Dev environment (Debug) -- Build and test (arm64, gcc12, openmpi) / Dev environment (Python) +- Build and test (amd64, llvm, openmpi) / Dev environment (Debug) +- Build and test (amd64, llvm, openmpi) / Dev environment (Python) +- Build and test (arm64, llvm, openmpi) / Dev environment (Debug) +- Build and test (arm64, llvm, openmpi) / Dev environment (Python) - Create CUDA Quantum installer (amd64, 12.6) / Build CUDA Quantum assets - Create CUDA Quantum installer (amd64, 12.6) / Minimal OpenMPI installation - Create CUDA Quantum installer (amd64, 12.6) / Validate installer (debian:12) @@ -44,13 +44,40 @@ merge_group: &full_matrix - Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (redhat/ubi8:8.10, --user) - Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (redhat/ubi8:8.10) push: -- Build and test (amd64, clang16, openmpi) / Dev environment (Debug) -- Build and test (amd64, clang16, openmpi) / Dev environment (Python) -- Build and test (amd64, gcc11, openmpi) / Dev environment (Debug) -- Build and test (amd64, gcc11, openmpi) / Dev environment (Python) -- Build and test (amd64, gcc12, openmpi) / Dev environment (Debug) -- Build and test (amd64, gcc12, openmpi) / Dev environment (Python) -- Build and test (arm64, clang16, openmpi) / Dev environment (Debug) -- Build and test (arm64, clang16, openmpi) / Dev environment (Python) +- Build and test (amd64, llvm, openmpi) / Dev environment (Debug) +- Build and test (amd64, llvm, openmpi) / Dev environment (Python) +- Build and test (arm64, llvm, openmpi) / Dev environment (Debug) +- Build and test (arm64, llvm, openmpi) / Dev environment (Python) +- Create CUDA Quantum installer (amd64, 12.6) / Build CUDA Quantum assets +- Create CUDA Quantum installer (amd64, 12.6) / Minimal OpenMPI installation +- Create CUDA Quantum installer (amd64, 12.6) / Validate installer (debian:12) +- Create CUDA Quantum installer (amd64, 12.6) / Validate installer (fedora:42) +- Create CUDA Quantum installer (amd64, 12.6) / Validate installer (opensuse/leap:15.5) +- Create CUDA Quantum installer (amd64, 12.6) / Validate installer (redhat/ubi9:9.6) +- Create CUDA Quantum installer (amd64, 12.6) / Validate installer (ubuntu:22.04) +- Create CUDA Quantum installer (arm64, 12.6) / Build CUDA Quantum assets +- Create CUDA Quantum installer (arm64, 12.6) / Minimal OpenMPI installation +- Create CUDA Quantum installer (arm64, 12.6) / Validate installer (redhat/ubi9:9.6) +- Create CUDA Quantum installer (arm64, 12.6) / Validate installer (ubuntu:22.04) +- Create Docker images (amd64) / Documentation +- Create Docker images (amd64) / Validation +- Create Docker images (arm64) / Validation +- Create Python metapackages / Build Python metapackages +- Create Python metapackages / Test Python metapackages (3.11) +- Create Python metapackages / Test Python metapackages (3.13) +- Create Python metapackages / Test Python metapackages (12.6, 3.11) +- Create Python metapackages / Test Python metapackages (12.6, 3.13) +- Create Python wheels (amd64, 3.11, 12.6) / Validate wheel (debian:12, --user) +- Create Python wheels (amd64, 3.11, 12.6) / Validate wheel (debian:12) +- Create Python wheels (amd64, 3.11, 12.6) / Validate wheel (fedora:42, --user) +- Create Python wheels (amd64, 3.11, 12.6) / Validate wheel (fedora:42) +- Create Python wheels (amd64, 3.11, 12.6) / Validate wheel (redhat/ubi8:8.10, --user) +- Create Python wheels (amd64, 3.11, 12.6) / Validate wheel (redhat/ubi8:8.10) +- Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (debian:12, --user) +- Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (debian:12) +- Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (fedora:42, --user) +- Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (fedora:42) +- Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (redhat/ubi8:8.10, --user) +- Create Python wheels (arm64, 3.11, 12.6) / Validate wheel (redhat/ubi8:8.10) workflow_dispatch: *full_matrix schedule: *full_matrix diff --git a/.github/workflows/build_package_sources.yml b/.github/workflows/build_package_sources.yml index caccffabb3e..f2112652b20 100644 --- a/.github/workflows/build_package_sources.yml +++ b/.github/workflows/build_package_sources.yml @@ -67,7 +67,7 @@ jobs: else # cudaqx: devcontainer base, cudaqx target cu_tag=$(echo "${{ matrix.cuda }}" | tr -d .) - echo "base_image=ghcr.io/nvidia/cuda-quantum-devcontainer:amd64-cu${{ matrix.cuda }}-gcc11-main" | tee -a $GITHUB_OUTPUT + echo "base_image=ghcr.io/nvidia/cuda-quantum-devcontainer:amd64-cu${{ matrix.cuda }}-llvm-main" | tee -a $GITHUB_OUTPUT echo "target_image=ghcr.io/nvidia/cudaqx:cu${cuda_major}-latest" | tee -a $GITHUB_OUTPUT fi diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index de5055fdbef..9084a6b7194 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,6 +42,7 @@ jobs: cache_base: ${{ steps.pr_info.outputs.pr_base }} llvm_commit: ${{ steps.repo_info.outputs.llvm_commit }} pybind11_commit: ${{ steps.repo_info.outputs.pybind11_commit }} + nanobind_commit: ${{ steps.repo_info.outputs.nanobind_commit }} platform_config: ${{ steps.config.outputs.platforms }} build_test_matrix: ${{ steps.config.outputs.build_test_matrix }} @@ -65,19 +66,13 @@ jobs: # (expensive, redundant with amd64/gcc). merge_group/dispatch run all. if [ "${{ github.event_name }}" = "push" ]; then build_test_matrix='{"include":[ - {"platform":"amd64","toolchain":"clang16","mpi":"openmpi"}, - {"platform":"amd64","toolchain":"gcc11","mpi":"openmpi"}, - {"platform":"amd64","toolchain":"gcc12","mpi":"openmpi"}, - {"platform":"arm64","toolchain":"clang16","mpi":"openmpi"} + {"platform":"amd64","toolchain":"llvm","mpi":"openmpi"}, + {"platform":"arm64","toolchain":"llvm","mpi":"openmpi"} ]}' else build_test_matrix='{"include":[ - {"platform":"amd64","toolchain":"clang16","mpi":"openmpi"}, - {"platform":"amd64","toolchain":"gcc11","mpi":"openmpi"}, - {"platform":"amd64","toolchain":"gcc12","mpi":"openmpi"}, - {"platform":"arm64","toolchain":"clang16","mpi":"openmpi"}, - {"platform":"arm64","toolchain":"gcc11","mpi":"openmpi"}, - {"platform":"arm64","toolchain":"gcc12","mpi":"openmpi"} + {"platform":"amd64","toolchain":"llvm","mpi":"openmpi"}, + {"platform":"arm64","toolchain":"llvm","mpi":"openmpi"}, ]}' fi echo "build_test_matrix=$(echo "$build_test_matrix" | jq -c .)" >> $GITHUB_OUTPUT @@ -114,6 +109,7 @@ jobs: run: | echo "llvm_commit=$(git rev-parse @:./tpls/llvm)" >> $GITHUB_OUTPUT echo "pybind11_commit=$(git rev-parse @:./tpls/pybind11)" >> $GITHUB_OUTPUT + echo "nanobind_commit=$(git rev-parse @:./tpls/nanobind)" >> $GITHUB_OUTPUT devdeps: name: Load dependencies @@ -121,7 +117,6 @@ jobs: strategy: matrix: platform: [amd64, arm64] - toolchain: [clang16, gcc11, gcc12] fail-fast: false uses: ./.github/workflows/dev_environment.yml secrets: @@ -130,14 +125,12 @@ jobs: with: platforms: linux/${{ matrix.platform }} dockerfile: build/devdeps.Dockerfile - build_config_id: ${{ matrix.toolchain }} - build_args: | - toolchain=${{ matrix.toolchain }} + build_config_id: llvm registry_cache_from: ${{ inputs.cache_base || needs.metadata.outputs.cache_base }} checkout_submodules: true environment: ghcr-ci # needed only for the cloudposse GitHub action - matrix_key: ${{ matrix.platform }}-${{ matrix.toolchain }} + matrix_key: ${{ matrix.platform }}-llvm matrix_step_name: dev_environment_devdeps wheeldeps: @@ -155,14 +148,15 @@ jobs: with: platforms: linux/${{ matrix.platform }} dockerfile: build/devdeps.manylinux.Dockerfile - build_config_id: cu${{ matrix.cuda_version }}-gcc11 + build_config_id: cu${{ matrix.cuda_version }}-gcc12 build_args: | base_image=ghcr.io/nvidia/pypa/manylinux_2_28${{ (matrix.platform == 'arm64' && '_aarch64') || (matrix.platform == 'amd64' && '_x86_64') || '' }}:latest cuda_version=${{ matrix.cuda_version }} - toolchain=gcc11 + toolchain=gcc12 distro=rhel8 llvm_commit=${{ needs.metadata.outputs.llvm_commit }} pybind11_commit=${{ needs.metadata.outputs.pybind11_commit }} + nanobind_commit=${{ needs.metadata.outputs.nanobind_commit }} registry_cache_from: ${{ inputs.cache_base || needs.metadata.outputs.cache_base }} environment: ghcr-ci # needed only for the cloudposse GitHub action @@ -259,16 +253,15 @@ jobs: strategy: matrix: platform: [amd64] - toolchain: [clang16] fail-fast: false uses: ./.github/workflows/generate_cc.yml secrets: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: platform: linux/${{ matrix.platform }} - devdeps_image: ${{ fromJson(needs.config_devdeps.outputs.json).image_hash[format('{0}-{1}', matrix.platform, matrix.toolchain)] }} - devdeps_cache: ${{ fromJson(needs.config_devdeps.outputs.json).cache_key[format('{0}-{1}', matrix.platform, matrix.toolchain)] }} - devdeps_archive: ${{ fromJson(needs.config_devdeps.outputs.json).tar_archive[format('{0}-{1}', matrix.platform, matrix.toolchain)] }} + devdeps_image: ${{ fromJson(needs.config_devdeps.outputs.json).image_hash[format('{0}-llvm', matrix.platform)] }} + devdeps_cache: ${{ fromJson(needs.config_devdeps.outputs.json).cache_key[format('{0}-llvm', matrix.platform)] }} + devdeps_archive: ${{ fromJson(needs.config_devdeps.outputs.json).tar_archive[format('{0}-llvm', matrix.platform)] }} export_environment: ${{ github.event_name == 'workflow_dispatch' && inputs.export_environment }} # Docker images are packaging, not correctness — only built on merge_group/dispatch. @@ -286,9 +279,9 @@ jobs: DOCKERHUB_READONLY_TOKEN: ${{ secrets.DOCKERHUB_READONLY_TOKEN }} with: platforms: linux/${{ matrix.platform }} - devdeps_image: ${{ fromJson(needs.config_devdeps.outputs.json).image_hash[format('{0}-gcc11', matrix.platform)] }} - devdeps_cache: ${{ fromJson(needs.config_devdeps.outputs.json).cache_key[format('{0}-gcc11', matrix.platform)] }} - devdeps_archive: ${{ fromJson(needs.config_devdeps.outputs.json).tar_archive[format('{0}-gcc11', matrix.platform)] }} + devdeps_image: ${{ fromJson(needs.config_devdeps.outputs.json).image_hash[format('{0}-llvm', matrix.platform)] }} + devdeps_cache: ${{ fromJson(needs.config_devdeps.outputs.json).cache_key[format('{0}-llvm', matrix.platform)] }} + devdeps_archive: ${{ fromJson(needs.config_devdeps.outputs.json).tar_archive[format('{0}-llvm', matrix.platform)] }} environment: ghcr-ci python_wheels: diff --git a/.github/workflows/clean_caches.yml b/.github/workflows/clean_caches.yml index 210fb0c68b7..083a035abf6 100644 --- a/.github/workflows/clean_caches.yml +++ b/.github/workflows/clean_caches.yml @@ -125,14 +125,14 @@ jobs: - name: Delete build caches for MPI asset (ARM64) uses: actions/delete-package-versions@v5 with: - package-name: buildcache-cuda-quantum-assets-openmpi-gcc11-arm64 + package-name: buildcache-cuda-quantum-assets-openmpi-llvm-arm64 package-type: 'container' min-versions-to-keep: 1 # the used action does not support 0 here - name: Delete build caches for MPI asset (AMD64) uses: actions/delete-package-versions@v5 with: - package-name: buildcache-cuda-quantum-assets-openmpi-gcc11-amd64 + package-name: buildcache-cuda-quantum-assets-openmpi-llvm-amd64 package-type: 'container' min-versions-to-keep: 1 # the used action does not support 0 here diff --git a/.github/workflows/create_cache_command.yml b/.github/workflows/create_cache_command.yml index 06e0ae6aeee..c19d059e11e 100644 --- a/.github/workflows/create_cache_command.yml +++ b/.github/workflows/create_cache_command.yml @@ -56,6 +56,7 @@ jobs: outputs: llvm_commit: ${{ steps.repo_info.outputs.llvm_commit }} pybind11_commit: ${{ steps.repo_info.outputs.pybind11_commit }} + nanobind_commit: ${{ steps.repo_info.outputs.nanobind_commit }} platform_config: ${{ steps.config.outputs.platforms }} steps: @@ -80,6 +81,7 @@ jobs: run: | echo "llvm_commit=$(git rev-parse @:./tpls/llvm)" >> $GITHUB_OUTPUT echo "pybind11_commit=$(git rev-parse @:./tpls/pybind11)" >> $GITHUB_OUTPUT + echo "nanobind_commit=$(git rev-parse @:./tpls/nanobind)" >> $GITHUB_OUTPUT devdeps_caches: name: Cache dev dependencies @@ -87,7 +89,6 @@ jobs: strategy: matrix: platform: [amd64, arm64] - toolchain: [clang16, gcc11, gcc12] fail-fast: false uses: ./.github/workflows/dev_environment.yml secrets: @@ -96,15 +97,13 @@ jobs: with: platforms: linux/${{ matrix.platform }} dockerfile: build/devdeps.Dockerfile - build_config_id: ${{ matrix.toolchain }} - build_args: | - toolchain=${{ matrix.toolchain }} + build_config_id: llvm create_local_cache: true registry_cache_from: ${{ needs.pr_info.outputs.target_branch }} pull_request_number: ${{ needs.pr_info.outputs.pull_request_number }} checkout_submodules: true # needed only for the cloudposse GitHub action - matrix_key: ${{ matrix.platform }}-${{ matrix.toolchain }} + matrix_key: ${{ matrix.platform }}-llvm wheeldeps_caches: name: Cache wheel dependencies @@ -121,14 +120,15 @@ jobs: with: platforms: linux/${{ matrix.platform }} dockerfile: build/devdeps.manylinux.Dockerfile - build_config_id: cu${{ matrix.cuda_version }}-gcc11 + build_config_id: cu${{ matrix.cuda_version }}-gcc12 build_args: | base_image=ghcr.io/nvidia/pypa/manylinux_2_28${{ (matrix.platform == 'arm64' && '_aarch64') || (matrix.platform == 'amd64' && '_x86_64') || '' }}:latest cuda_version=${{ matrix.cuda_version }} - toolchain=gcc11 + toolchain=gcc12 distro=rhel8 llvm_commit=${{ needs.metadata.outputs.llvm_commit }} pybind11_commit=${{ needs.metadata.outputs.pybind11_commit }} + nanobind_commit=${{ needs.metadata.outputs.nanobind_commit }} create_local_cache: true registry_cache_from: ${{ needs.pr_info.outputs.target_branch }} pull_request_number: ${{ needs.pr_info.outputs.pull_request_number }} diff --git a/.github/workflows/deployments.yml b/.github/workflows/deployments.yml index 54b4ccad6b1..25ff93aeefe 100644 --- a/.github/workflows/deployments.yml +++ b/.github/workflows/deployments.yml @@ -77,6 +77,7 @@ jobs: pull_request_commit: ${{ steps.pr_info.outputs.merge_commit }} llvm_commit: ${{ steps.build_config.outputs.llvm_commit }} pybind11_commit: ${{ steps.build_config.outputs.pybind11_commit }} + nanobind_commit: ${{ steps.build_config.outputs.nanobind_commit }} cache_base: ${{ steps.build_info.outputs.cache_base }} cache_target: ${{ steps.build_info.outputs.cache_target }} multi_platform: ${{ steps.build_info.outputs.multi_platform }} @@ -188,6 +189,7 @@ jobs: run: | echo "llvm_commit=$(git rev-parse @:./tpls/llvm)" >> $GITHUB_OUTPUT echo "pybind11_commit=$(git rev-parse @:./tpls/pybind11)" >> $GITHUB_OUTPUT + echo "nanobind_commit=$(git rev-parse @:./tpls/nanobind)" >> $GITHUB_OUTPUT if ${{ github.event_name != 'workflow_run' || steps.pr_info.outputs.pr_number != '' }}; then echo "build_dependencies=true" >> $GITHUB_OUTPUT @@ -203,7 +205,7 @@ jobs: strategy: matrix: platform: ${{ fromJson(needs.metadata.outputs.platforms).ids }} - toolchain: [clang16, gcc11, gcc12] + toolchain: [llvm] fail-fast: false uses: ./.github/workflows/dev_environment.yml secrets: @@ -242,14 +244,15 @@ jobs: with: platforms: ${{ fromJson(needs.metadata.outputs.platforms)[format('{0}', matrix.platform)].docker_flag }} dockerfile: build/devdeps.manylinux.Dockerfile - build_config_id: cu${{ matrix.cuda_version }}-gcc11 + build_config_id: cu${{ matrix.cuda_version }}-gcc12 build_args: | base_image=ghcr.io/nvidia/pypa/manylinux_2_28${{ (matrix.platform == 'arm64' && '_aarch64') || (matrix.platform == 'amd64' && '_x86_64') || '' }}:latest cuda_version=${{ matrix.cuda_version }} - toolchain=gcc11 + toolchain=gcc12 distro=rhel8 llvm_commit=${{ needs.metadata.outputs.llvm_commit }} pybind11_commit=${{ needs.metadata.outputs.pybind11_commit }} + nanobind_commit=${{ needs.metadata.outputs.nanobind_commit }} registry_cache_from: ${{ needs.metadata.outputs.cache_base }} update_registry_cache: ${{ needs.metadata.outputs.cache_target }} pull_request_number: ${{ needs.metadata.outputs.pull_request_number }} @@ -368,7 +371,7 @@ jobs: strategy: matrix: platform: [amd64] - toolchain: [clang16] + toolchain: [llvm] fail-fast: false uses: ./.github/workflows/generate_cc.yml secrets: @@ -393,10 +396,10 @@ jobs: with: platforms: ${{ fromJson(needs.metadata.outputs.platforms)[format('{0}', matrix.platform)].docker_flag }} dockerfile: build/devcontainer.Dockerfile - build_config_id: cu${{ matrix.cuda_version }}-gcc11 + build_config_id: cu${{ matrix.cuda_version }}-llvm build_args: | cuda_version=${{ matrix.cuda_version }} - base_image=${{ fromJson(needs.config.outputs.json).image_hash[format('{0}-gcc11', matrix.platform)] }} + base_image=${{ fromJson(needs.config.outputs.json).image_hash[format('{0}-llvm', matrix.platform)] }} ompidev_image=${{ fromJson(needs.config.outputs.json).image_hash[format('{0}-cu{1}-ompi', matrix.platform, matrix.cuda_version)] }} registry_cache_from: ${{ needs.metadata.outputs.cache_base }} update_registry_cache: ${{ needs.metadata.outputs.cache_target }} diff --git a/.github/workflows/dev_environment_macos.yml b/.github/workflows/dev_environment_macos.yml index 579f9ea0073..9947f016430 100644 --- a/.github/workflows/dev_environment_macos.yml +++ b/.github/workflows/dev_environment_macos.yml @@ -95,6 +95,7 @@ jobs: scripts/build_llvm.sh \ scripts/set_env_defaults.sh \ .github/workflows/dev_environment_macos.yml \ + tpls/customizations/llvm/*.diff \ | sha256sum | cut -c1-8) echo "scripts_hash=$scripts_hash" >> $GITHUB_OUTPUT @@ -181,6 +182,9 @@ jobs: # cache already has MLIR_ENABLE_BINDINGS_PYTHON=ON. Downstream # wheel jobs only change Python3_EXECUTABLE, which keeps ninja's # incremental rebuild scoped to the binding targets. + # Initialize nanobind submodule which are needed for MLIR Python bindings + git submodule update --init --recursive tpls/nanobind + source scripts/set_env_defaults.sh export LLVM_PROJECTS='clang;lld;mlir;openmp;python-bindings' diff --git a/.github/workflows/test_in_devenv.yml b/.github/workflows/test_in_devenv.yml index 6aa6b771e11..6015fe11d30 100644 --- a/.github/workflows/test_in_devenv.yml +++ b/.github/workflows/test_in_devenv.yml @@ -297,6 +297,7 @@ jobs: echo "::error file=test_in_devenv.yml:: Pip install of CUDA Quantum failed with status $pyinstall_status." exit 1 fi + python -m pip install pytest pytest-xdist python -m pytest -v --durations=0 -n auto python/tests/ \ --ignore python/tests/backends \ diff --git a/.gitmodules b/.gitmodules index 644ab8cc24f..5cf32c5ccd4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -51,3 +51,4 @@ [submodule "tpls/nanobind"] path = tpls/nanobind url = https://github.com/wjakob/nanobind.git + ignore = dirty diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 54e8edface0..d07196d066e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,7 +53,6 @@ repos: - id: markdownlint name: Markdown linting files: '\.md$' - exclude: '^tpls/' args: ['--config', '.github/pre-commit/md_lint_config.yml'] # Standard quality checks diff --git a/Building.md b/Building.md index e0f00751f40..d9a86b5ebfb 100644 --- a/Building.md +++ b/Building.md @@ -69,7 +69,10 @@ CUDA-Q can be built on macOS for development purposes. Note that: - **ARM64 only**: Only Apple silicon Macs are supported; Intel Macs are not supported - **CPU-only**: No CUDA/GPU support is available on macOS -- **Apple Clang**: Uses the system compiler (no need to install GCC or LLVM separately) +- **LLVM 22.1 toolchain**: CUDA-Q is compiled with the `Clang/LLD/libomp` built + by `scripts/build_llvm.sh` — the same toolchain used on Linux. Xcode Command + Line Tools are still required for the macOS `SDK/sysroot` (headers, frameworks, + `xcrun`), but Apple Clang is no longer used to compile CUDA-Q itself. - **Prerequisites required**: You must use `-p` to install LLVM and other dependencies Before building, complete the macOS setup steps in diff --git a/CMakeLists.txt b/CMakeLists.txt index ddcc6a9ea22..a4670866380 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ endif() # CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()` # New in CMake 3.20. https://cmake.org/cmake/help/latest/policy/CMP0116.html if(POLICY CMP0116) - cmake_policy(SET CMP0116 OLD) + cmake_policy(SET CMP0116 NEW) endif() # Project setup @@ -87,11 +87,14 @@ endif() # Enable the remote simulator by default. if (CUDAQ_ENABLE_REST AND NOT DEFINED CUDAQ_ENABLE_REMOTE_SIM) set(CUDAQ_ENABLE_REMOTE_SIM ON CACHE BOOL "Enable building cudaq-qpud.") - # Optionally enable the tests that use cudaq-qpud. if (NOT DEFINED CUDAQ_TEST_REMOTE_SIM) set(CUDAQ_TEST_REMOTE_SIM ON CACHE BOOL "Run remote-sim tests.") endif() endif() +if (NOT CUDAQ_ENABLE_REST) + set(CUDAQ_ENABLE_REMOTE_SIM OFF CACHE BOOL "Enable building cudaq-qpud." FORCE) + set(CUDAQ_TEST_REMOTE_SIM OFF CACHE BOOL "Run remote-sim tests." FORCE) +endif() # Enable Amazon Braket backends by default. if (NOT DEFINED CUDAQ_ENABLE_BRAKET_BACKEND) @@ -140,7 +143,10 @@ endif() set(CMAKE_EXPORT_COMPILE_COMMANDS 1) if(NOT LLVM_VERSION_MAJOR) - set(LLVM_VERSION_MAJOR 16) + set(LLVM_VERSION_MAJOR 22) +endif() +if(NOT LLVM_VERSION_MINOR) + set(LLVM_VERSION_MINOR 1) endif() find_package(Git QUIET) @@ -182,6 +188,27 @@ if (${CUDAQ_FORCE_COLORED_OUTPUT}) endif () endif () +add_compile_options(-Wno-error=deprecated-declarations) +# Use plain -Wno- (not -Wno-error=) so GCC silently ignores it when unsupported. +# Guard with check_cxx_compiler_flag so Apple Clang (which errors on unknown +# -Wno- options) doesn't see it if the warning doesn't exist there. +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag(-Wno-uninitialized-const-pointer + CUDAQ_HAS_WNO_UNINITIALIZED_CONST_POINTER) +if (CUDAQ_HAS_WNO_UNINITIALIZED_CONST_POINTER) + add_compile_options(-Wno-uninitialized-const-pointer) +endif() +# GCC 12 headers emit false-positive diagnostics (e.g. char_traits.h, +# stl_algobase.h) when compiled with Clang using GCC's sysroot. +check_cxx_compiler_flag(-Wno-restrict CUDAQ_HAS_WNO_RESTRICT) +if (CUDAQ_HAS_WNO_RESTRICT) + add_compile_options(-Wno-restrict) +endif() +# -Wstringop-overflow is GCC-only; check_cxx_compiler_flag caching is unreliable here. +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options(-Wno-stringop-overflow) +endif() + # Certain build configurations may be set directly in the environment. # This facilitates some of the packaging (e.g. python packages built based on the pyproject.toml). # These are cached so they persist across cmake runs without needing the env vars set again. @@ -229,9 +256,9 @@ SET(CMAKE_SKIP_INSTALL_RPATH FALSE) SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) if(APPLE) - SET(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/lib;@loader_path/lib/plugins;@loader_path/../lib;@loader_path/../lib/plugins;@executable_path;@executable_path/lib;@executable_path/lib/plugins;@executable_path/../lib;@executable_path/../lib/plugins") + SET(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/lib;@loader_path/lib/plugins;@loader_path/../lib;@loader_path/../lib/plugins;@loader_path/../cudaq/mlir/_mlir_libs;@loader_path/../python/cudaq/mlir/_mlir_libs;@executable_path;@executable_path/lib;@executable_path/lib/plugins;@executable_path/../lib;@executable_path/../lib/plugins;@executable_path/../cudaq/mlir/_mlir_libs;@executable_path/../python/cudaq/mlir/_mlir_libs") else() - SET(CMAKE_INSTALL_RPATH "$ORIGIN:$ORIGIN/lib:$ORIGIN/lib/plugins:$ORIGIN/../lib:$ORIGIN/../lib/plugins") + SET(CMAKE_INSTALL_RPATH "$ORIGIN:$ORIGIN/lib:$ORIGIN/lib/plugins:$ORIGIN/../lib:$ORIGIN/../lib/plugins:$ORIGIN/../cudaq/mlir/_mlir_libs:$ORIGIN/../python/cudaq/mlir/_mlir_libs") endif() SET(BLA_STATIC ON) @@ -262,6 +289,7 @@ else() # B. Using LLVM/MLIR dylibs. This won't work until later versions of LLVM are used as # the first versions of this setting did not appropriately link all libraries to the dylibs. add_link_options("-Wl,-flat_namespace") + add_link_options("-Wl,-undefined,dynamic_lookup") endif() # Detect sysroot for C++ stdlib headers/libs. Critical on macOS where a custom-built @@ -325,10 +353,10 @@ endif() # using `LLVM_VERSION_MAJOR`, e.g. "-LLVM_VERSION_MAJOR=16". Note that this # version variable is set to the latest LLVM version by default, and setting it # to an older version might break the project. -find_package(LLVM ${LLVM_VERSION_MAJOR} CONFIG QUIET) +find_package(LLVM ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} CONFIG QUIET) if(NOT LLVM_DIR) - message(STATUS "LLVM_DIR not found, will try with llvm-config executable.") + message(STATUS "LLVM_DIR not found for ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}, will try with llvm-config executable.") macro(find_llvm_config name version_major) set(extra_args ${ARGN}) @@ -386,7 +414,7 @@ if(NOT LLVM_DIR) "Could not find suitable llvm-config(-${LLVM_VERSION_MAJOR}).\ \nTry providing valid -DLLVM_DIR=/path/to/llvm/lib/cmake/llvm.") else() - find_package(LLVM ${LLVM_VERSION_MAJOR} REQUIRED CONFIG + find_package(LLVM ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} REQUIRED CONFIG HINTS ${LLVM_CONFIG_CMAKE_DIR} NO_DEFAULT_PATH) endif() endif() @@ -682,6 +710,7 @@ endif() if (CUDAQ_ENABLE_PYTHON) find_package(Python 3 COMPONENTS Interpreter Development) + find_package(Python3 COMPONENTS Interpreter Development) # Apply specific patch to pybind11 for our documentation. # Only apply the patch if not already applied. @@ -729,6 +758,12 @@ if(CUDAQ_BUILD_TESTS AND NOT CUDAQ_DISABLE_CPP_FRONTEND) umbrella_lit_testsuite_begin(check-all) set(INSTALL_GTEST OFF) add_subdirectory(tpls/googletest-src) + # Turn off character-conversion warning in gtest for clang compilers + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag(-Wno-character-conversion CUDAQ_HAS_WNO_CHARACTER_CONVERSION) + if (CUDAQ_HAS_WNO_CHARACTER_CONVERSION) + target_compile_options(gtest PUBLIC -Wno-character-conversion) + endif() # Bug in GCC 12 leads to spurious warnings (-Wrestrict) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105329 if (CMAKE_COMPILER_IS_GNUCXX diff --git a/cmake/caches/LLVM.cmake b/cmake/caches/LLVM.cmake index efce183a8dc..6b546d36a74 100644 --- a/cmake/caches/LLVM.cmake +++ b/cmake/caches/LLVM.cmake @@ -20,15 +20,17 @@ set(LLVM_ENABLE_ZSTD OFF CACHE BOOL "") set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "") set(LLVM_BUILD_TESTS OFF CACHE BOOL "") +set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "") set(LLVM_BUILD_EXAMPLES OFF CACHE BOOL "") set(LLVM_ENABLE_OCAMLDOC OFF CACHE BOOL "") if(DEFINED LLVM_ENABLE_RUNTIMES AND LLVM_ENABLE_RUNTIMES MATCHES "libcxx") message(STATUS "Setting defaults to use LLVM runtimes.") - # If we want to build dynamic libraries for the unwinder, - # we need to build support for exception handling. - set(LLVM_ENABLE_EH ON CACHE BOOL "") + # The runtimes (libcxx, libcxxabi, libunwind) control exception support + # independently via LIBCXX_ENABLE_EXCEPTIONS and LIBCXXABI_ENABLE_EXCEPTIONS. + # LLVM_ENABLE_EH must remain OFF when Flang is a project, since Flang + # rejects LLVM_ENABLE_EH=ON with a FATAL_ERROR. set(LLVM_ENABLE_RTTI ON CACHE BOOL "") set(LIBCXX_ENABLE_EXCEPTIONS ON CACHE BOOL "") set(LIBCXXABI_ENABLE_EXCEPTIONS ON CACHE BOOL "") diff --git a/cmake/modules/BuildHelpers.cmake b/cmake/modules/BuildHelpers.cmake index e52d9347a16..7ee5b77d6d0 100644 --- a/cmake/modules/BuildHelpers.cmake +++ b/cmake/modules/BuildHelpers.cmake @@ -8,29 +8,42 @@ include_guard() +function(_cudaq_check_openmp_usable RESULT_VAR) + find_package(OpenMP) + if(NOT OpenMP_CXX_FOUND) + set(${RESULT_VAR} FALSE PARENT_SCOPE) + return() + endif() + include(CheckCXXCompilerFlag) + set(CMAKE_REQUIRED_FLAGS "${OpenMP_CXX_FLAGS}") + check_cxx_compiler_flag("${OpenMP_CXX_FLAGS}" CUDAQ_HAS_OPENMP_FLAG) + unset(CMAKE_REQUIRED_FLAGS) + set(${RESULT_VAR} ${CUDAQ_HAS_OPENMP_FLAG} PARENT_SCOPE) +endfunction() + # If OpenMP is enabled and found, adds the necessary compile definitions to the # given target, and the necessary dependencies to the given list of dependencies. function(add_openmp_configurations TARGET_NAME DEPENDENCIES) - find_package(OpenMP) - if(OpenMP_CXX_FOUND) + _cudaq_check_openmp_usable(_openmp_usable) + if(_openmp_usable) message(STATUS "OpenMP Found. Adding build flags to target ${TARGET_NAME}: ${OpenMP_CXX_FLAGS}.") list(APPEND ${DEPENDENCIES} OpenMP::OpenMP_CXX) - set(${DEPENDENCIES} "${${DEPENDENCIES}}" PARENT_SCOPE) + set(${DEPENDENCIES} "${${DEPENDENCIES}}" PARENT_SCOPE) target_compile_definitions(${TARGET_NAME} PRIVATE HAS_OPENMP) elseif (CUDAQ_REQUIRE_OPENMP) - message(FATAL_ERROR "OpenMP not found.") + message(FATAL_ERROR "OpenMP not found or compiler rejects OpenMP flags.") endif() endfunction() # If OpenMP is enabled and found, adds the necessary compile definitions to the # interface dependencies of the given target. function(add_openmp_interface_definitions TARGET_NAME) - find_package(OpenMP) - if(OpenMP_CXX_FOUND) + _cudaq_check_openmp_usable(_openmp_usable) + if(_openmp_usable) message(STATUS "OpenMP Found. Adding interface definitions to target ${TARGET_NAME}.") target_compile_definitions(${TARGET_NAME} INTERFACE HAS_OPENMP) elseif (CUDAQ_REQUIRE_OPENMP) - message(FATAL_ERROR "OpenMP not found.") + message(FATAL_ERROR "OpenMP not found or compiler rejects OpenMP flags.") endif() endfunction() diff --git a/docker/build/assets.Dockerfile b/docker/build/assets.Dockerfile index 58c83cd9555..2d4f4d65ff3 100644 --- a/docker/build/assets.Dockerfile +++ b/docker/build/assets.Dockerfile @@ -58,6 +58,7 @@ ADD tpls/customizations/llvm /cuda-quantum/tpls/customizations/llvm ADD .gitmodules /cuda-quantum/.gitmodules ADD .git/modules/tpls/pybind11/HEAD /.git_modules/tpls/pybind11/HEAD ADD .git/modules/tpls/llvm/HEAD /.git_modules/tpls/llvm/HEAD +ADD .git/modules/tpls/nanobind/HEAD /.git_modules/tpls/nanobind/HEAD # This is a hack so that we do not need to rebuild the prerequisites # whenever we pick up a new CUDA-Q commit (which is always in CI). @@ -72,7 +73,7 @@ RUN cd /cuda-quantum && git init && \ fi; \ done && git submodule init && git submodule RUN cd /cuda-quantum && source scripts/configure_build.sh && \ - LLVM_PROJECTS='clang;flang;lld;mlir;openmp;runtimes' \ + LLVM_PROJECTS='clang;flang;lld;mlir;openmp;runtimes' BOOTSTRAP_LLVM=true \ bash scripts/install_prerequisites.sh -t llvm -e qrmi # Validate that the built toolchain and libraries have no GCC dependencies. @@ -250,7 +251,7 @@ RUN cd /cuda-quantum && \ bash scripts/install_prerequisites.sh -t llvm -e qrmi && \ CC="$LLVM_INSTALL_PREFIX/bin/clang" \ CXX="$LLVM_INSTALL_PREFIX/bin/clang++" \ - FC="$LLVM_INSTALL_PREFIX/bin/flang-new" \ + FC="$LLVM_INSTALL_PREFIX/bin/flang" \ python3 -m build --wheel && \ echo "=== ccache stats (python_build) ===" && (ccache -s 2>/dev/null || true) ## [ to inject a pre-populated cache, # while the devcontainer builds get the scratch as a noop. diff --git a/docker/build/devcontainer.Dockerfile b/docker/build/devcontainer.Dockerfile index 4100c90e340..5e6fdc10bdc 100644 --- a/docker/build/devcontainer.Dockerfile +++ b/docker/build/devcontainer.Dockerfile @@ -17,7 +17,7 @@ # docker build -t ghcr.io/nvidia/cuda-quantum-devdeps:ext -f docker/build/devdeps.ext.Dockerfile . ARG cuda_version=12.6 -ARG base_image=ghcr.io/nvidia/cuda-quantum-devdeps:gcc11-main +ARG base_image=ghcr.io/nvidia/cuda-quantum-devdeps:gcc12-main ARG ompidev_image=ghcr.io/nvidia/cuda-quantum-devdeps:cu12-ompi-main FROM $ompidev_image AS ompibuild ARG cuda_version diff --git a/docker/build/devdeps.Dockerfile b/docker/build/devdeps.Dockerfile index 82068b89b88..78f9f957f43 100644 --- a/docker/build/devdeps.Dockerfile +++ b/docker/build/devdeps.Dockerfile @@ -6,21 +6,20 @@ # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # -# This file builds the development environment that contains the necessary development -# dependencies for building and testing CUDA-Q. This does not include the CUDA, OpenMPI +# This file builds the development environment that contains the necessary development +# dependencies for building and testing CUDA-Q. This does not include the CUDA, OpenMPI # and other dependencies that some of the simulator backends require. These backends # will be omitted from the build if this environment is used. # # Usage: # Must be built from the repo root with: -# docker build -t ghcr.io/nvidia/cuda-quantum-devdeps:${toolchain}-latest -f docker/build/devdeps.Dockerfile --build-arg toolchain=$toolchain . +# docker build -t ghcr.io/nvidia/cuda-quantum-devdeps:llvm-latest -f docker/build/devdeps.Dockerfile . # -# The variable $toolchain indicates which compiler toolchain to build the LLVM libraries with. +# The variable $toolchain indicates which compiler toolchain to build the LLVM libraries with. # The toolchain used to build the LLVM binaries that CUDA-Q depends on must be used to build -# CUDA-Q. This image sets the CC and CXX environment variables to use that toolchain. -# Currently, clang16, clang15, gcc12, and gcc11 are supported. To use a different -# toolchain, add support for it to the install_toolchain.sh script. If the toolchain is set to llvm, -# then the toolchain will be built from source. +# CUDA-Q. Currently, the $toolchain argument is a no-op; the bootstrap always uses clang. +# Support for gcc12 (and potentially other toolchains) may be added back in the future. +# To use a different toolchain, add support for it to the install_toolchain.sh script. # [Operating System] ARG base_image=ubuntu:24.04 @@ -28,10 +27,10 @@ ARG base_image=ubuntu:24.04 # [CUDA-Q Dependencies] FROM ${base_image} AS prereqs SHELL ["/bin/bash", "-c"] -ARG toolchain=gcc11 +ARG toolchain=llvm # When a dialogue box would be needed during install, assume default configurations. -# Set here to avoid setting it for all install commands. +# Set here to avoid setting it for all install commands. # Given as arg to make sure that this value is only set during build but not in the launched container. ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && \ @@ -51,30 +50,29 @@ ENV ZLIB_INSTALL_PREFIX=/usr/local/zlib ENV OPENSSL_INSTALL_PREFIX=/usr/local/openssl ENV CURL_INSTALL_PREFIX=/usr/local/curl ENV AWS_INSTALL_PREFIX=/usr/local/aws +ENV NANOBIND_INSTALL_PREFIX=/usr/local/nanobind # TODO: eliminate the need for this ENV PIP_BREAK_SYSTEM_PACKAGES=1 ## [Build Dependencies] RUN apt-get update && apt-get install -y --no-install-recommends \ - wget git unzip \ + wget git unzip ccache \ + libstdc++-13-dev \ python3-dev python3-pip && \ python3 -m pip install --no-cache-dir numpy --break-system-packages && \ apt-get autoremove -y --purge && apt-get clean && rm -rf /var/lib/apt/lists/* +ADD scripts/configure_build.sh /cuda-quantum/scripts/configure_build.sh ADD scripts/install_toolchain.sh /cuda-quantum/scripts/install_toolchain.sh -RUN source /cuda-quantum/scripts/install_toolchain.sh \ - -e "$LLVM_INSTALL_PREFIX/bootstrap" -t ${toolchain} - -## [Source Dependencies] -ADD scripts/install_prerequisites.sh /cuda-quantum/scripts/install_prerequisites.sh ADD scripts/build_llvm.sh /cuda-quantum/scripts/build_llvm.sh ADD cmake/caches/LLVM.cmake /cuda-quantum/cmake/caches/LLVM.cmake ADD tpls/customizations/llvm /cuda-quantum/tpls/customizations/llvm ADD .gitmodules /cuda-quantum/.gitmodules ADD .git/modules/tpls/pybind11/HEAD /.git_modules/tpls/pybind11/HEAD ADD .git/modules/tpls/llvm/HEAD /.git_modules/tpls/llvm/HEAD +ADD .git/modules/tpls/nanobind/HEAD /.git_modules/tpls/nanobind/HEAD -# This is initializing the .git index sufficiently so that we can -# check out the correct commits based on the submodule commit. +# This is initializing the .git index sufficiently so that we can +# check out the correct commits based on the submodule commit. RUN cd /cuda-quantum && git init && \ git config -f .gitmodules --get-regexp '^submodule\..*\.path$' | \ while read path_key local_path; do \ @@ -85,9 +83,15 @@ RUN cd /cuda-quantum && git init && \ $(cat /.git_modules/$local_path/HEAD) $local_path; \ fi; \ done && git submodule init && git submodule -# Build compiler-rt (only) since it is needed for code coverage tools -RUN LLVM_PROJECTS='clang;lld;mlir;python-bindings;compiler-rt' \ - bash /cuda-quantum/scripts/install_prerequisites.sh -t ${toolchain} + +## [Source Dependencies] +ADD scripts/bootstrap_prerequisites.sh /cuda-quantum/scripts/bootstrap_prerequisites.sh +RUN apt-get update && apt-get install -y --no-install-recommends clang lld && \ + CC=clang CXX=clang++ \ + LLVM_PROJECTS='clang;flang;lld;mlir;python-bindings;compiler-rt' \ + bash /cuda-quantum/scripts/bootstrap_prerequisites.sh && \ + (apt-get remove -y clang lld || true) && apt-get autoremove -y --purge && \ + apt-get clean && rm -rf /var/lib/apt/lists/* ## [Dev Dependencies] RUN if [ "$(uname -m)" == "x86_64" ]; then \ @@ -120,18 +124,8 @@ COPY --from=prereqs /usr/local/llvm /usr/local/llvm ENV LLVM_INSTALL_PREFIX=/usr/local/llvm ENV PATH="$PATH:$LLVM_INSTALL_PREFIX/bin/" -# Install the C/C++ compiler toolchain with which the LLVM dependencies have -# been built. CUDA-Q needs to be built with that same toolchain. We use -# a wrapper script so that the path that we set CC and CXX to is independent -# on the installed toolchain. Unfortunately, a symbolic link won't work. -# Using update-alternatives for c++ and cc could maybe be a better option. -RUN source "$LLVM_INSTALL_PREFIX/bootstrap/init_command.sh" \ - && echo -e '#!/bin/bash\n"'$CC'" "$@"' > "$LLVM_INSTALL_PREFIX/bootstrap/cc" \ - && echo -e '#!/bin/bash\n"'$CXX'" "$@"' > "$LLVM_INSTALL_PREFIX/bootstrap/cxx" \ - && chmod +x "$LLVM_INSTALL_PREFIX/bootstrap/cc" \ - && chmod +x "$LLVM_INSTALL_PREFIX/bootstrap/cxx" -ENV CC="$LLVM_INSTALL_PREFIX/bootstrap/cc" -ENV CXX="$LLVM_INSTALL_PREFIX/bootstrap/cxx" +ENV CC="$LLVM_INSTALL_PREFIX/bin/clang" +ENV CXX="$LLVM_INSTALL_PREFIX/bin/clang++" # Copy over additional prerequisites. ENV BLAS_INSTALL_PREFIX=/usr/local/blas @@ -139,6 +133,8 @@ ENV ZLIB_INSTALL_PREFIX=/usr/local/zlib ENV OPENSSL_INSTALL_PREFIX=/usr/local/openssl ENV CURL_INSTALL_PREFIX=/usr/local/curl ENV AWS_INSTALL_PREFIX=/usr/local/aws +ENV NANOBIND_INSTALL_PREFIX=/usr/local/nanobind +COPY --from=prereqs /usr/local/nanobind "$NANOBIND_INSTALL_PREFIX" COPY --from=prereqs /usr/local/blas "$BLAS_INSTALL_PREFIX" COPY --from=prereqs /usr/local/zlib "$ZLIB_INSTALL_PREFIX" COPY --from=prereqs /usr/local/openssl "$OPENSSL_INSTALL_PREFIX" @@ -155,7 +151,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y wget ca-certifi ENV PATH="${PATH}:/usr/local/cmake-3.28/bin" COPY requirements-dev.txt /cuda-quantum/requirements-dev.txt RUN apt-get update && apt-get install -y --no-install-recommends \ - git gdb ninja-build file lldb ccache \ + git gdb ninja-build file lldb ccache libatomic1 \ + libstdc++-13-dev \ python3 python3-pip libpython3-dev \ && python3 -m pip install --no-cache-dir --break-system-packages \ -r /cuda-quantum/requirements-dev.txt \ diff --git a/docker/build/devdeps.manylinux.Dockerfile b/docker/build/devdeps.manylinux.Dockerfile index 4d5d321d80e..81b161ad515 100644 --- a/docker/build/devdeps.manylinux.Dockerfile +++ b/docker/build/devdeps.manylinux.Dockerfile @@ -16,7 +16,7 @@ # The variable $toolchain indicates which compiler toolchain to build the LLVM libraries with. # The toolchain used to build the LLVM binaries that CUDA-Q depends on must be used to build # CUDA-Q. This image sets the CC and CXX environment variables to use that toolchain. -# Currently, clang16 and gcc11, gcc12, and gcc13 are supported. +# Currently, gcc12 and gcc13 are supported. # There are currently no multi-platform manylinux images available. # See https://github.com/pypa/manylinux/issues/1306. @@ -26,7 +26,7 @@ FROM ${base_image} ARG distro=rhel8 ARG llvm_commit ARG pybind11_commit -ARG toolchain=gcc11 +ARG toolchain=gcc12 # When a dialogue box would be needed during install, assume default configurations. # Set here to avoid setting it for all install commands. @@ -53,9 +53,6 @@ RUN if [ "${toolchain#gcc}" != "$toolchain" ]; then \ enable_script=`find / -path '*gcc*' -path '*'$gcc_version'*' -name enable` && . "$enable_script"; \ fi && \ CC="$(which gcc)" && CXX="$(which g++)"; \ - elif [ "$toolchain" == 'clang16' ]; then \ - dnf install -y --nobest --setopt=install_weak_deps=False clang-16.0.6 && \ - CC="$(which clang-16)" && CXX="$(which clang++-16)"; \ else echo "Toolchain not supported." && exit 1; \ fi && dnf clean all \ && mkdir -p "$LLVM_INSTALL_PREFIX/bootstrap" \ @@ -88,7 +85,7 @@ RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.28.4/cmake-3.2 ADD ./scripts/build_llvm.sh /scripts/build_llvm.sh ADD ./cmake/caches/LLVM.cmake /cmake/caches/LLVM.cmake ADD ./tpls/customizations/llvm/ /tpls/customizations/llvm/ -RUN LLVM_PROJECTS='clang;mlir' LLVM_SOURCE=/llvm-project \ +RUN LLVM_PROJECTS='clang;lld;mlir' LLVM_SOURCE=/llvm-project \ LLVM_CMAKE_CACHE=/cmake/caches/LLVM.cmake \ LLVM_CMAKE_PATCHES=/tpls/customizations/llvm \ bash /scripts/build_llvm.sh -c Release -v diff --git a/docker/release/cudaq.wheel.Dockerfile b/docker/release/cudaq.wheel.Dockerfile index b1d47ba7c09..d3e9956f375 100644 --- a/docker/release/cudaq.wheel.Dockerfile +++ b/docker/release/cudaq.wheel.Dockerfile @@ -18,7 +18,7 @@ # - https://github.com/numpy/numpy/blob/main/pyproject.toml, and # - https://github.com/numpy/numpy/blob/main/.github/workflows/wheels.yml -ARG base_image=ghcr.io/nvidia/cuda-quantum-devdeps:manylinux-amd64-cu12.6-gcc11-main +ARG base_image=ghcr.io/nvidia/cuda-quantum-devdeps:manylinux-amd64-cu12.6-gcc12-main # Default empty stage for ccache data. CI overrides this with # --build-context ccache-data= to inject a pre-populated cache, # while local/devcontainer builds get a harmless no-op (empty scratch). @@ -50,12 +50,12 @@ RUN --mount=from=ccache-data,target=/tmp/ccache-import,rw \ mkdir -p /root/.ccache; \ fi RUN echo "Building MLIR bindings for python${python_version}" && \ - CCACHE_DISABLE=1 python${python_version} -m pip install --no-cache-dir numpy && \ + CCACHE_DISABLE=1 python${python_version} -m pip install --no-cache-dir numpy "nanobind>=2.9.0" && \ rm -rf "$LLVM_INSTALL_PREFIX/src" "$LLVM_INSTALL_PREFIX/python_packages" && \ Python3_EXECUTABLE="$(which python${python_version})" \ - LLVM_PROJECTS='clang;mlir;python-bindings' \ + LLVM_PROJECTS='clang;lld;mlir;python-bindings' \ LLVM_CMAKE_CACHE=/cmake/caches/LLVM.cmake LLVM_SOURCE=/llvm-project \ - bash /scripts/build_llvm.sh -c Release -v + bash /scripts/build_llvm.sh -c Release -v # Build wheel using unified wheel build script RUN cd /cuda-quantum && \ diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index a9c1db77491..c2b9fd93f79 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -243,8 +243,14 @@ def setup(app): ('cpp:identifier', 'cudaq::detail::EigenSparseMatrix'), ('cpp:identifier', 'detail'), ('cpp:identifier', 'detail::NoisePoint'), + # nanobind generates RST function directives whose description text gets + # misinterpreted as py:class cross-references in nitpick mode + ('py:class', 'Convert spin_op to JSON string'), + ('py:class', 'Checks if all operators in the product are the identity. Note'), ] +suppress_warnings = ["myst.duplicate_def"] + napoleon_google_docstring = True napoleon_numpy_docstring = False autosectionlabel_prefix_document = True diff --git a/include/cudaq/Frontend/nvqpp/ASTBridge.h b/include/cudaq/Frontend/nvqpp/ASTBridge.h index 69294759fb0..a571d50dc94 100644 --- a/include/cudaq/Frontend/nvqpp/ASTBridge.h +++ b/include/cudaq/Frontend/nvqpp/ASTBridge.h @@ -15,6 +15,7 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/GlobalDecl.h" #include "clang/AST/Mangle.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/Analysis/CallGraph.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" @@ -371,33 +372,39 @@ class QuakeBridgeVisitor // Type nodes to lower to Quake. //===--------------------------------------------------------------------===// - bool TraverseTypedefType(clang::TypedefType *t) { + bool TraverseTypedefType(clang::TypedefType *t, bool &visitChildren) { return TraverseType(t->desugar()); } - bool TraverseTypedefTypeLoc(clang::TypedefTypeLoc tl) { + bool TraverseTypedefTypeLoc(clang::TypedefTypeLoc tl, bool &visitChildren) { return TraverseType(tl.getType()); } - bool TraverseUsingType(clang::UsingType *t) { + bool TraverseUsingType(clang::UsingType *t, bool &visitChildren) { return TraverseType(t->desugar()); } - bool TraverseUsingTypeLoc(clang::UsingTypeLoc tl) { + bool TraverseUsingTypeLoc(clang::UsingTypeLoc tl, bool &visitChildren) { return TraverseType(tl.getType()); } - bool - TraverseTemplateSpecializationType(clang::TemplateSpecializationType *t) { + bool TraverseTemplateSpecializationType(clang::TemplateSpecializationType *t, + bool &visitChildren) { return TraverseType(t->desugar()); } - bool TraverseTypeOfExprType(clang::TypeOfExprType *t) { + bool TraverseTypeOfExprType(clang::TypeOfExprType *t, bool &visitChildren) { // Do not visit the expression as it is has no semantics other than for // inferring a type. return TraverseType(t->desugar()); } - bool TraverseNestedNameSpecifier(clang::NestedNameSpecifier *) { - return true; + bool TraverseNestedNameSpecifier(clang::NestedNameSpecifier) { return true; } + bool TraverseDecltypeType(clang::DecltypeType *t, bool &visitChildren) { + return TraverseType(t->desugar()); } - bool TraverseDecltypeType(clang::DecltypeType *t) { + bool TraversePredefinedSugarType(clang::PredefinedSugarType *t, + bool &visitChildren) { return TraverseType(t->desugar()); } + bool TraversePredefinedSugarTypeLoc(clang::PredefinedSugarTypeLoc tl, + bool &visitChildren) { + return TraverseType(tl.getType()); + } // When processing a record type, visit the type of all the field decls. This // will push 1 new type on the stack for each field. These types will be the @@ -413,7 +420,7 @@ class QuakeBridgeVisitor return Base::WalkUpFromFieldDecl(x); } - bool TraverseRecordType(clang::RecordType *t); + bool TraverseRecordType(clang::RecordType *t, bool &visitChildren); bool interceptRecordDecl(clang::RecordDecl *x); std::pair getWidthAndAlignment(clang::RecordDecl *x); bool VisitRecordDecl(clang::RecordDecl *x); @@ -468,9 +475,10 @@ class QuakeBridgeVisitor mlir::Value loadLValue(mlir::Value val) { auto valTy = val.getType(); if (isa(valTy)) - return builder.create(val.getLoc(), val); + return cudaq::cc::LoadOp::create(builder, val.getLoc(), val); if (isa(valTy)) - return builder.create(val.getLoc(), val); + return mlir::LLVM::LoadOp::create(builder, val.getLoc(), + builder.getI8Type(), val); return val; } @@ -789,7 +797,7 @@ inline bool isInNamespace(const clang::Decl *x, mlir::StringRef nsName) { do { if (const auto *nsd = dyn_cast(declCtx)) if (const auto *nsi = nsd->getIdentifier()) - if (nsi->getName().equals(nsName)) + if (nsi->getName() == nsName) return true; declCtx = declCtx->getParent(); } while (declCtx); @@ -804,7 +812,7 @@ inline bool isInClassInNamespace(const clang::Decl *x, assert(x && "decl is null"); if (const auto *cld = dyn_cast(x->getDeclContext())) if (const auto *cli = cld->getIdentifier()) - return cli->getName().equals(className) && isInNamespace(cld, nsName); + return (cli->getName() == className) && isInNamespace(cld, nsName); return false; } diff --git a/include/cudaq/Frontend/nvqpp/QisBuilder.h b/include/cudaq/Frontend/nvqpp/QisBuilder.h index 489dc39873f..078b853e869 100644 --- a/include/cudaq/Frontend/nvqpp/QisBuilder.h +++ b/include/cudaq/Frontend/nvqpp/QisBuilder.h @@ -8,7 +8,6 @@ #pragma once -#include "llvm/Support/Registry.h" #include "mlir/IR/Builders.h" namespace nvqpp { @@ -21,6 +20,4 @@ class QISBuilder { mlir::ValueRange general_operands) = 0; }; -using QISBuilderRegistry = llvm::Registry; - } // namespace nvqpp diff --git a/include/cudaq/Optimizer/Builder/Factory.h b/include/cudaq/Optimizer/Builder/Factory.h index 7956aa68e21..73783252c78 100644 --- a/include/cudaq/Optimizer/Builder/Factory.h +++ b/include/cudaq/Optimizer/Builder/Factory.h @@ -19,6 +19,10 @@ #include #include +namespace llvm { +class DataLayout; +} + namespace quake { class StateType; } @@ -65,7 +69,7 @@ inline mlir::Type getCharType(mlir::MLIRContext *ctx) { /// Return the LLVM-IR dialect `ptr` type. inline mlir::Type getPointerType(mlir::MLIRContext *ctx) { - return mlir::LLVM::LLVMPointerType::get(getCharType(ctx)); + return mlir::LLVM::LLVMPointerType::get(ctx); } /// The type of a dynamic buffer as returned via the runtime. @@ -79,9 +83,9 @@ inline mlir::Type getOpaquePointerType(mlir::MLIRContext *ctx) { return mlir::LLVM::LLVMPointerType::get(ctx, /*addressSpace=*/0); } -/// Return the LLVM-IR dialect type: `ty*`. +/// Return the LLVM-IR dialect type: `ptr`. (changed for modern LLVM.) inline mlir::Type getPointerType(mlir::Type ty) { - return mlir::LLVM::LLVMPointerType::get(ty); + return factory::getPointerType(ty.getContext()); } cudaq::cc::PointerType getIndexedObjectType(mlir::Type eleTy); @@ -163,7 +167,7 @@ inline mlir::LLVM::ConstantOp genLlvmI32Constant(mlir::Location loc, std::int32_t val) { auto idx = builder.getI32IntegerAttr(val); auto i32Ty = builder.getI32Type(); - return builder.create(loc, i32Ty, idx); + return mlir::LLVM::ConstantOp::create(builder, loc, i32Ty, idx); } inline mlir::LLVM::ConstantOp genLlvmI64Constant(mlir::Location loc, @@ -171,14 +175,14 @@ inline mlir::LLVM::ConstantOp genLlvmI64Constant(mlir::Location loc, std::int64_t val) { auto idx = builder.getI64IntegerAttr(val); auto i64Ty = builder.getI64Type(); - return builder.create(loc, i64Ty, idx); + return mlir::LLVM::ConstantOp::create(builder, loc, i64Ty, idx); } inline mlir::Value createFloatConstant(mlir::Location loc, mlir::OpBuilder &builder, llvm::APFloat value, mlir::FloatType type) { - return builder.create(loc, value, type); + return mlir::arith::ConstantFloatOp::create(builder, loc, type, value); } inline mlir::Value createFloatConstant(mlir::Location loc, @@ -220,11 +224,16 @@ inline mlir::Block *addEntryBlock(mlir::LLVM::GlobalOp initVar) { /// Return an i64 array where element `k` is `N` if the /// operand `k` is `veq` and 0 otherwise. +/// \p originalControls contains the pre-conversion quake control values, +/// used to distinguish `veq` from ref types (necessary with opaque pointers +/// where both convert to the same !llvm.ptr type). mlir::Value packIsArrayAndLengthArray(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, mlir::ModuleOp parentModule, std::size_t numOperands, - mlir::ValueRange operands); + mlir::ValueRange operands, + mlir::ValueRange originalControls); + mlir::FlatSymbolRefAttr createLLVMFunctionSymbol(mlir::StringRef name, mlir::Type retType, mlir::ArrayRef inArgTypes, diff --git a/include/cudaq/Optimizer/Builder/Intrinsics.h b/include/cudaq/Optimizer/Builder/Intrinsics.h index e731e836c0d..20a3cadee98 100644 --- a/include/cudaq/Optimizer/Builder/Intrinsics.h +++ b/include/cudaq/Optimizer/Builder/Intrinsics.h @@ -19,9 +19,8 @@ class GlobalOp; /// calls will be erased before code gen. static constexpr const char stdMoveBuiltin[] = ".std::move"; -static constexpr const char llvmMemCopyIntrinsic[] = - "llvm.memcpy.p0i8.p0i8.i64"; -static constexpr const char llvmMemSetIntrinsic[] = "llvm.memset.p0i8.i64"; +static constexpr const char llvmMemCopyIntrinsic[] = "llvm.memcpy.p0.p0.i64"; +static constexpr const char llvmMemSetIntrinsic[] = "llvm.memset.p0.i64"; // cudaq::range(count); static constexpr const char setCudaqRangeVector[] = "__nvqpp_CudaqRangeInit"; diff --git a/include/cudaq/Optimizer/CAPI/Dialects.h b/include/cudaq/Optimizer/CAPI/Dialects.h index 251d805d638..9abb3df8f69 100644 --- a/include/cudaq/Optimizer/CAPI/Dialects.h +++ b/include/cudaq/Optimizer/CAPI/Dialects.h @@ -8,6 +8,7 @@ #pragma once +#include "mlir/CAPI/IR.h" #include "mlir/CAPI/Registration.h" #ifdef __cplusplus @@ -17,6 +18,9 @@ extern "C" { MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(Quake, quake); MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(CC, cc); +// Register Quake, CC, and all upstream MLIR dialects into `context`. +MLIR_CAPI_EXPORTED void cudaqRegisterAllDialects(MlirContext context); + #ifdef __cplusplus } #endif diff --git a/include/cudaq/Optimizer/CodeGen/CodeGenDialect.td b/include/cudaq/Optimizer/CodeGen/CodeGenDialect.td index b94b977633d..fcefa08f754 100644 --- a/include/cudaq/Optimizer/CodeGen/CodeGenDialect.td +++ b/include/cudaq/Optimizer/CodeGen/CodeGenDialect.td @@ -24,7 +24,6 @@ def CodeGenDialect : Dialect { let cppNamespace = "cudaq::codegen"; let useDefaultTypePrinterParser = 1; - let useFoldAPI = kEmitFoldAdaptorFolder; let extraClassDeclaration = [{ void registerTypes(); // register at least a bogo type. diff --git a/include/cudaq/Optimizer/CodeGen/Passes.h b/include/cudaq/Optimizer/CodeGen/Passes.h index e36c350711b..b6bd4a5e6ef 100644 --- a/include/cudaq/Optimizer/CodeGen/Passes.h +++ b/include/cudaq/Optimizer/CodeGen/Passes.h @@ -13,6 +13,10 @@ /// particular quantum target representation. There is a bevy of such targets /// that provide platforms on which the quantum code can be run. +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassRegistry.h" @@ -60,10 +64,10 @@ mlir::LLVM::LLVMStructType lambdaAsPairOfPointers(mlir::MLIRContext *context); /// before conversion to the LLVM-IR dialect. void registerToQIRAPIPipeline(); -/// Add the convert to QIR API pipeline to \p pm. We don't use opaque pointers -/// yet, so provide a convenient overload. +/// Add the convert to QIR API pipeline to \p pm. With the move to LLVM 22, we +/// now use opaque pointers. void addConvertToQIRAPIPipeline(mlir::OpPassManager &pm, mlir::StringRef api, - bool opaquePtr = false); + bool opaquePtr = true); /// The pipeline for lowering Quake code to the execution manager API. This /// pipeline should be run before conversion to the LLVM-IR dialect. diff --git a/include/cudaq/Optimizer/CodeGen/Passes.td b/include/cudaq/Optimizer/CodeGen/Passes.td index 598e9d2c7c0..8d072551cc1 100644 --- a/include/cudaq/Optimizer/CodeGen/Passes.td +++ b/include/cudaq/Optimizer/CodeGen/Passes.td @@ -58,7 +58,9 @@ def ConvertToQIR : Pass<"quake-to-qir", "mlir::ModuleOp"> { }]; let dependentDialects = [ - "cudaq::codegen::CodeGenDialect", "mlir::LLVM::LLVMDialect" + "cudaq::codegen::CodeGenDialect", "mlir::LLVM::LLVMDialect", + "mlir::arith::ArithDialect", "mlir::complex::ComplexDialect", + "cudaq::cc::CCDialect" ]; } @@ -136,6 +138,7 @@ def QIRToQIRProfile : Pass<"convert-to-qir-profile"> { "Which QIR profile to convert to (default is 'qir-base')"> ]; + let dependentDialects = ["mlir::LLVM::LLVMDialect"]; let constructor = "cudaq::opt::createQIRToQIRProfilePass(\"qir-base\")"; } @@ -172,7 +175,8 @@ def WireSetToProfileQIR : Pass<"wireset-to-profile-qir", "mlir::func::FuncOp"> { the code to CC dialect with QIR calls, etc. }]; - let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect"]; + let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect", + "mlir::func::FuncDialect", "mlir::arith::ArithDialect"]; let options = [ Option<"convertTo", "convert-to", "std::string", /*default=*/"\"qir-base\"", "Select the profile to convert wire sets to."> @@ -220,7 +224,7 @@ def QuakeToQIRAPI : Pass<"quake-to-qir-api"> { let options = [ Option<"api", "api", "std::string", /*default=*/"\"full\"", "Select the QIR API to use.">, - Option<"opaquePtr", "opaque-pointer", "bool", /*default=*/"false", + Option<"opaquePtr", "opaque-pointer", "bool", /*default=*/"true", "Use opaque pointers."> ]; } @@ -250,7 +254,7 @@ def QuakeToQIRAPIPrep : Pass<"quake-to-qir-api-prep", "mlir::ModuleOp"> { let options = [ Option<"api", "api", "std::string", /*default=*/"\"full\"", "Select the QIR API to use.">, - Option<"opaquePtr", "opaque-pointer", "bool", /*default=*/"false", + Option<"opaquePtr", "opaque-pointer", "bool", /*default=*/"true", "Use opaque pointers."> ]; } diff --git a/include/cudaq/Optimizer/CodeGen/Peephole.h b/include/cudaq/Optimizer/CodeGen/Peephole.h index e829cdb5f7a..f8fd0493127 100644 --- a/include/cudaq/Optimizer/CodeGen/Peephole.h +++ b/include/cudaq/Optimizer/CodeGen/Peephole.h @@ -16,9 +16,9 @@ #include "mlir/Support/LLVM.h" inline bool needsToBeRenamed(mlir::StringRef name) { - return name.startswith(cudaq::opt::QIRQISPrefix) && - !name.endswith("__body") && !name.endswith("__adj") && - !name.endswith("__ctl"); + return name.starts_with(cudaq::opt::QIRQISPrefix) && + !name.ends_with("__body") && !name.ends_with("__adj") && + !name.ends_with("__ctl"); } inline bool callToInvokeWithXCtrlOneTarget(mlir::StringRef callee, @@ -26,7 +26,7 @@ inline bool callToInvokeWithXCtrlOneTarget(mlir::StringRef callee, if ((args.size() == 4) && (callee == cudaq::opt::NVQIRInvokeWithControlBits)) if (auto addrOf = dyn_cast_or_null( args[1].getDefiningOp())) { - return addrOf.getGlobalName().startswith( + return addrOf.getGlobalName().starts_with( std::string(cudaq::opt::QIRQISPrefix) + "x__ctl"); } return false; @@ -41,14 +41,14 @@ static constexpr char resultIndexName[] = "result.index"; inline mlir::Value createMeasureCall(mlir::PatternRewriter &builder, mlir::Location loc, mlir::LLVM::CallOp op, mlir::ValueRange args) { - auto ptrTy = cudaq::opt::getResultType(builder.getContext()); + auto ptrTy = cudaq::cg::getLLVMResultType(builder.getContext()); if (auto intAttr = dyn_cast_or_null(op->getAttr(resultIndexName))) { - auto constOp = builder.create(loc, intAttr); - auto cast = builder.create(loc, ptrTy, constOp); - builder.create( - loc, mlir::TypeRange{}, cudaq::opt::QIRMeasureBody, - mlir::ArrayRef{args[0], cast}); + mlir::Value constOp = mlir::LLVM::ConstantOp::create(builder, loc, intAttr); + auto cast = mlir::LLVM::IntToPtrOp::create(builder, loc, ptrTy, constOp); + mlir::LLVM::CallOp::create(builder, loc, mlir::TypeRange{}, + cudaq::opt::QIRMeasureBody, + mlir::ArrayRef{args[0], cast}); return cast; } op.emitError("mz op must have an associated result index."); @@ -60,9 +60,8 @@ inline mlir::Value createReadResultCall(mlir::PatternRewriter &builder, mlir::Value result) { // NB: This code is only used from a deprecated pass. auto i1Ty = mlir::IntegerType::get(builder.getContext(), 1); - return builder - .create(loc, mlir::TypeRange{i1Ty}, - cudaq::opt::qir0_1::ReadResultBody, - mlir::ArrayRef{result}) + return mlir::LLVM::CallOp::create(builder, loc, mlir::TypeRange{i1Ty}, + cudaq::opt::qir0_1::ReadResultBody, + mlir::ArrayRef{result}) .getResult(); } diff --git a/include/cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h b/include/cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h index aca0cc5d2ba..4dfd71ce612 100644 --- a/include/cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h +++ b/include/cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h @@ -11,6 +11,7 @@ /// This file provides the opaque struct types to be used with the obsolete LLVM /// typed pointer type. +#include "cudaq/Optimizer/Builder/Factory.h" #include "cudaq/Optimizer/Dialect/CC/CCTypes.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" @@ -22,74 +23,65 @@ inline mlir::Type getQuantumTypeByName(mlir::StringRef type, } namespace opt { - -// The following type creators are deprecated and should only be used in the -// older codegen passes. Use the creators in the cg namespace immediately below -// instead. -inline mlir::Type getOpaquePointerType(mlir::MLIRContext *context) { - return mlir::LLVM::LLVMPointerType::get(context); -} - -inline mlir::Type getQubitType(mlir::MLIRContext *context) { - return mlir::LLVM::LLVMPointerType::get( - getQuantumTypeByName("Qubit", context)); -} - -inline mlir::Type getArrayType(mlir::MLIRContext *context) { - return mlir::LLVM::LLVMPointerType::get( - getQuantumTypeByName("Array", context)); -} - -inline mlir::Type getResultType(mlir::MLIRContext *context) { - return mlir::LLVM::LLVMPointerType::get( - getQuantumTypeByName("Result", context)); -} - -inline mlir::Type getCharPointerType(mlir::MLIRContext *context) { - return mlir::LLVM::LLVMPointerType::get(mlir::IntegerType::get(context, 8)); -} - void initializeTypeConversions(mlir::LLVMTypeConverter &typeConverter); - } // namespace opt namespace cg { -// The following type creators replace the ones above. They are configurable on -// the fly to either use opaque structs or opaque pointers. The default is to -// use pointers to opaque structs, which is no longer supported in modern LLVM. +// These type creators are configurable on the fly to either use opaque structs +// or opaque pointers. The default is to use opaque pointers, which are the +// default in any modern LLVM version. inline mlir::Type getOpaquePointerType(mlir::MLIRContext *context) { return cc::PointerType::get(mlir::NoneType::get(context)); } inline mlir::Type getQubitType(mlir::MLIRContext *context, - bool useOpaquePtr = false) { + bool useOpaquePtr = true) { if (useOpaquePtr) return getOpaquePointerType(context); return cc::PointerType::get(getQuantumTypeByName("Qubit", context)); } inline mlir::Type getArrayType(mlir::MLIRContext *context, - bool useOpaquePtr = false) { + bool useOpaquePtr = true) { if (useOpaquePtr) return getOpaquePointerType(context); return cc::PointerType::get(getQuantumTypeByName("Array", context)); } inline mlir::Type getResultType(mlir::MLIRContext *context, - bool useOpaquePtr = false) { + bool useOpaquePtr = true) { if (useOpaquePtr) return getOpaquePointerType(context); return cc::PointerType::get(getQuantumTypeByName("Result", context)); } inline mlir::Type getCharPointerType(mlir::MLIRContext *context, - bool useOpaquePtr = false) { + bool useOpaquePtr = true) { if (useOpaquePtr) return getOpaquePointerType(context); return cc::PointerType::get(mlir::IntegerType::get(context, 8)); } +// LLVM Types: +// The factory builder will build opaque pointers for modern MLIR. + +inline mlir::Type getLLVMQubitType(mlir::MLIRContext *context) { + return opt::factory::getPointerType(getQuantumTypeByName("Qubit", context)); +} + +inline mlir::Type getLLVMArrayType(mlir::MLIRContext *context) { + return opt::factory::getPointerType(getQuantumTypeByName("Array", context)); +} + +inline mlir::Type getLLVMResultType(mlir::MLIRContext *context) { + return opt::factory::getPointerType(getQuantumTypeByName("Result", context)); +} + +inline mlir::Type getLLVMCharPointerType(mlir::MLIRContext *context) { + return opt::factory::getPointerType(mlir::IntegerType::get(context, 8)); +} + } // namespace cg } // namespace cudaq diff --git a/include/cudaq/Optimizer/Dialect/CC/CCDialect.td b/include/cudaq/Optimizer/Dialect/CC/CCDialect.td index 456235e2b7d..e6b2e0d9f40 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCDialect.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCDialect.td @@ -32,7 +32,6 @@ def CCDialect : Dialect { let cppNamespace = "cudaq::cc"; let useDefaultTypePrinterParser = 1; - let useFoldAPI = kEmitFoldAdaptorFolder; let extraClassDeclaration = [{ /// Register all CC types. diff --git a/include/cudaq/Optimizer/Dialect/CC/CCOps.td b/include/cudaq/Optimizer/Dialect/CC/CCOps.td index f6f3e4fe711..bf7f13c8ba4 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCOps.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCOps.td @@ -332,8 +332,8 @@ def cc_LoopOp : CCOp<"loop", mlir::Block::BlockArgListType{}; } - mlir::OperandRange - getSuccessorEntryOperands(std::optional index); + mlir::OperandRange getEntrySuccessorOperands(mlir::RegionBranchPoint point); + mlir::OperandRange getEntrySuccessorOperands(mlir::RegionSuccessor point); bool hasBreakInBody(); }]; @@ -345,8 +345,8 @@ def cc_LoopOp : CCOp<"loop", def cc_IfOp : CCOp<"if", [DeclareOpInterfaceMethods, + ["getNumRegionInvocations", "getRegionInvocationBounds", + "getEntrySuccessorRegions"]>, RecursiveMemoryEffects, LinearTypeArgsTrait]> { let summary = "if-then-else operation"; let description = [{ @@ -981,7 +981,6 @@ def cc_ExtractValueOp : CCOp<"extract_value", [Pure]> { $rawConstantIndices) `]` `:` functional-type(operands, results) attr-dict }]; - let hasFolder = 1; let hasVerifier = 1; let hasCanonicalizer = 1; @@ -1081,7 +1080,6 @@ def cc_ComputePtrOp : CCOp<"compute_ptr", [Pure]> { `]` `:` functional-type(operands, results) attr-dict }]; - let hasFolder = 1; let hasCanonicalizer = 1; let hasVerifier = 1; @@ -1411,7 +1409,6 @@ def cc_CastOp : CCOp<"cast", [Pure]> { ); let results = (outs AnyType:$result); - let hasFolder = 1; let hasCanonicalizer = 1; let hasVerifier = 1; @@ -1551,7 +1548,9 @@ def cc_CallCallableOp : CCOp<"call_callable", [CallOpInterface]> { let arguments = (ins AnyCallableType:$callee, - Variadic:$args + Variadic:$args, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs ); let results = (outs Variadic:$results); let hasVerifier = 1; @@ -1560,7 +1559,17 @@ def cc_CallCallableOp : CCOp<"call_callable", [CallOpInterface]> { $callee (`,` $args^)? `:` functional-type(operands, results) attr-dict }]; + let builders = [ + OpBuilder<(ins "mlir::TypeRange":$result, "mlir::Value":$callee, + "mlir::ValueRange":$args), [{ + return build($_builder, $_state, result, callee, args, mlir::ArrayAttr{}, + mlir::ArrayAttr{}); + }]> + ]; + let extraClassDeclaration = [{ + static constexpr mlir::StringRef getCalleeAttrNameStr() { return "callee"; } + /// Get the argument operands to the called function. operand_range getArgOperands() { return {arg_operand_begin(), arg_operand_end()}; @@ -1569,9 +1578,18 @@ def cc_CallCallableOp : CCOp<"call_callable", [CallOpInterface]> { operand_iterator arg_operand_begin() { return ++operand_begin(); } operand_iterator arg_operand_end() { return operand_end(); } + mlir::MutableOperandRange getArgOperandsMutable() { + return getArgsMutable(); + } + /// Return the callee of this operation. mlir::CallInterfaceCallable getCallableForCallee() { return getCallee(); } + /// Set the callee for this operation. + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + setOperand(0, mlir::cast(callee)); + } + mlir::FunctionType getFunctionType() { return mlir::FunctionType::get(getContext(), getOperands().getType(), getResults().getTypes()); @@ -1593,7 +1611,9 @@ def cc_CallIndirectCallableOp : let arguments = (ins cc_IndirectCallableType:$callee, - Variadic:$args + Variadic:$args, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs ); let results = (outs Variadic:$results); let hasVerifier = 1; @@ -1603,6 +1623,14 @@ def cc_CallIndirectCallableOp : $callee (`,` $args^)? `:` functional-type(operands, results) attr-dict }]; + let builders = [ + OpBuilder<(ins "mlir::TypeRange":$result, "mlir::Value":$callee, + "mlir::ValueRange":$args), [{ + return build($_builder, $_state, result, callee, args, mlir::ArrayAttr{}, + mlir::ArrayAttr{}); + }]> + ]; + let extraClassDeclaration = [{ /// Get the argument operands to the called function. operand_range getArgOperands() { @@ -1612,9 +1640,18 @@ def cc_CallIndirectCallableOp : operand_iterator arg_operand_begin() { return ++operand_begin(); } operand_iterator arg_operand_end() { return operand_end(); } + mlir::MutableOperandRange getArgOperandsMutable() { + return getArgsMutable(); + } + /// Return the callee of this operation. mlir::CallInterfaceCallable getCallableForCallee() { return getCallee(); } + /// Set the callee for this operation. + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + setOperand(0, mlir::cast(callee)); + } + mlir::FunctionType getFunctionType() { return mlir::FunctionType::get(getContext(), getOperands().getType(), getResults().getTypes()); @@ -1789,7 +1826,9 @@ def cc_NoInlineCallOp : CCOp<"noinline_call", let arguments = (ins FlatSymbolRefAttr:$callee, - Variadic:$args + Variadic:$args, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs ); let results = (outs Variadic); @@ -1805,6 +1844,15 @@ def cc_NoInlineCallOp : CCOp<"noinline_call", operand_iterator arg_operand_begin() { return operand_begin(); } operand_iterator arg_operand_end() { return operand_end(); } + mlir::MutableOperandRange getArgOperandsMutable() { + return getArgsMutable(); + } + + /// Set the callee for this operation. + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + setOperand(0, mlir::cast(callee)); + } + /// DO NOT RETURN the callee of this operation. This fools the inliner into /// not knowing what is actually called. mlir::CallInterfaceCallable getCallableForCallee() { @@ -1830,7 +1878,9 @@ def cc_DeviceCallOp : CCOp<"device_call", Variadic:$numBlocks, Variadic:$numThreadsPerBlock, Optional:$device, - Variadic:$args + Variadic:$args, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs ); let results = (outs Variadic); let assemblyFormat = [{ @@ -1845,18 +1895,20 @@ def cc_DeviceCallOp : CCOp<"device_call", OpBuilder<(ins "mlir::TypeRange":$resTys, "mlir::StringRef":$callee, "mlir::ValueRange":$values), [{ return build($_builder, $_state, resTys, callee, mlir::ValueRange{}, - mlir::ValueRange{}, mlir::Value{}, values); + mlir::ValueRange{}, mlir::Value{}, values, mlir::ArrayAttr{}, + mlir::ArrayAttr{}); }]>, OpBuilder<(ins "mlir::TypeRange":$resTys, "mlir::StringRef":$callee, "mlir::Value":$device, "mlir::ValueRange":$values), [{ return build($_builder, $_state, resTys, callee, mlir::ValueRange{}, - mlir::ValueRange{}, device, values); + mlir::ValueRange{}, device, values, mlir::ArrayAttr{}, + mlir::ArrayAttr{}); }]>, OpBuilder<(ins "mlir::TypeRange":$resTys, "mlir::StringRef":$callee, "mlir::ValueRange":$blocks, "mlir::ValueRange":$threads, "mlir::ValueRange":$values), [{ return build($_builder, $_state, resTys, callee, blocks, threads, - mlir::Value{}, values); + mlir::Value{}, values, mlir::ArrayAttr{}, mlir::ArrayAttr{}); }]> ]; @@ -1868,6 +1920,15 @@ def cc_DeviceCallOp : CCOp<"device_call", operand_iterator arg_operand_begin() { return operand_begin(); } operand_iterator arg_operand_end() { return operand_end(); } + mlir::MutableOperandRange getArgOperandsMutable() { + return getArgsMutable(); + } + + /// Set the callee for this operation. + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + setOperand(0, mlir::cast(callee)); + } + /// Return the callee of this operation. mlir::CallInterfaceCallable getCallableForCallee() { return getCalleeAttr(); @@ -1912,7 +1973,9 @@ def cc_VarargCallOp : CCOp<"call_vararg", let arguments = (ins FlatSymbolRefAttr:$callee, - Variadic:$args + Variadic:$args, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs ); let results = (outs Variadic); @@ -1920,6 +1983,20 @@ def cc_VarargCallOp : CCOp<"call_vararg", $callee `(` $args `)` `:` functional-type(operands, results) attr-dict }]; + let builders = [ + OpBuilder<(ins "mlir::TypeRange":$result, "mlir::FlatSymbolRefAttr":$callee, + "mlir::ValueRange":$args), [{ + return build($_builder, $_state, result, callee, args, mlir::ArrayAttr{}, + mlir::ArrayAttr{}); + }]>, + OpBuilder<(ins "mlir::TypeRange":$result, "mlir::StringRef":$callee, + "mlir::ValueRange":$args), [{ + return build($_builder, $_state, result, + mlir::FlatSymbolRefAttr::get($_builder.getContext(), callee), args, + mlir::ArrayAttr{}, mlir::ArrayAttr{}); + }]> + ]; + let extraClassDeclaration = [{ operand_range getArgOperands() { return {arg_operand_begin(), arg_operand_end()}; @@ -1928,11 +2005,21 @@ def cc_VarargCallOp : CCOp<"call_vararg", operand_iterator arg_operand_begin() { return operand_begin(); } operand_iterator arg_operand_end() { return operand_end(); } + mlir::MutableOperandRange getArgOperandsMutable() { + return getArgsMutable(); + } + /// Return the callee of this operation. mlir::CallInterfaceCallable getCallableForCallee() { return getCalleeAttr(); } + /// Set the callee for this operation. + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + (*this)->setAttr(getCalleeAttrName(), + llvm::cast(callee)); + } + mlir::LogicalResult verifySymbolUses(mlir::SymbolTableCollection &); }]; } diff --git a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td index 15b533a6a8a..12a58032c64 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td @@ -311,21 +311,21 @@ def AnyStateInitLike : TypeConstraint; def AnyStateInitType : Type; -def IsStdvecTypePred : CPred<"$_self.isa<::cudaq::cc::StdvecType>()">; +def IsStdvecTypePred : CPred<"::mlir::isa<::cudaq::cc::StdvecType>($_self)">; class StdvecOf allowedTypes> : Type< And<[IsStdvecTypePred, Concat<"[](::mlir::Type elementType) { return ", SubstLeaves<"$_self", "elementType", AnyTypeOf.predicate>, - "; }($_self.cast<::cudaq::cc::StdvecType>().getElementType())">]>, + "; }(::mlir::cast<::cudaq::cc::StdvecType>($_self).getElementType())">]>, "stdvec of " # AnyTypeOf.summary # " values", "::cudaq::cc::StdvecType">; -def IsPointerTypePred : CPred<"$_self.isa<::cudaq::cc::PointerType>()">; +def IsPointerTypePred : CPred<"::mlir::isa<::cudaq::cc::PointerType>($_self)">; class PointerOf allowedTypes> : Type< And<[IsPointerTypePred, Concat<"[](::mlir::Type elementType) { return ", SubstLeaves<"$_self", "elementType", AnyTypeOf.predicate>, - "; }($_self.cast<::cudaq::cc::PointerType>().getElementType())">]>, + "; }(::mlir::cast<::cudaq::cc::PointerType>($_self).getElementType())">]>, "pointer of " # AnyTypeOf.summary # " values", "::cudaq::cc::PointerType">; diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeDialect.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeDialect.td index af6c0ec803e..98f24840960 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeDialect.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeDialect.td @@ -30,7 +30,6 @@ def QuakeDialect : Dialect { /// Register all Quake types. void registerTypes(); }]; - let useFoldAPI = kEmitFoldAdaptorFolder; } #endif // CUDAQ_OPTIMIZER_DIALECT_QUAKE_IR_QUAKE diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h index 52755a6befe..42096690e80 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h @@ -34,17 +34,18 @@ void getResetEffectsImpl( mlir::SmallVectorImpl< mlir::SideEffects::EffectInstance> &effects, - mlir::ValueRange targets); + llvm::MutableArrayRef targets); void getMeasurementEffectsImpl( mlir::SmallVectorImpl< mlir::SideEffects::EffectInstance> &effects, - mlir::ValueRange targets); + llvm::MutableArrayRef targets); void getOperatorEffectsImpl( mlir::SmallVectorImpl< mlir::SideEffects::EffectInstance> &effects, - mlir::ValueRange controls, mlir::ValueRange targets); + llvm::MutableArrayRef controls, + llvm::MutableArrayRef targets); mlir::ParseResult genericOpParse(mlir::OpAsmParser &parser, mlir::OperationState &result); diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index bfe252c497c..35c44db9ef3 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -397,7 +397,9 @@ def quake_ApplyOp : QuakeOp<"apply", Variadic:$indirect_callee, // must be 0 or 1 element UnitAttr:$is_adj, Variadic:$controls, - Variadic:$actuals + Variadic:$actuals, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs ); let results = (outs Variadic); @@ -411,7 +413,7 @@ def quake_ApplyOp : QuakeOp<"apply", "mlir::ValueRange":$controls, "mlir::ValueRange":$args), [{ return build($_builder, $_state, retTy, callee, {}, is_adj, controls, - args); + args, {}, {}); }]>, OpBuilder<(ins "mlir::TypeRange":$retTy, "mlir::SymbolRefAttr":$callee, @@ -419,7 +421,7 @@ def quake_ApplyOp : QuakeOp<"apply", "mlir::ValueRange":$controls, "mlir::ValueRange":$args), [{ return build($_builder, $_state, retTy, callee, {}, is_adj, controls, - args); + args, {}, {}); }]>, OpBuilder<(ins "mlir::TypeRange":$retTy, "mlir::Value":$callable, @@ -427,7 +429,7 @@ def quake_ApplyOp : QuakeOp<"apply", "mlir::ValueRange":$controls, "mlir::ValueRange":$args), [{ return build($_builder, $_state, retTy, mlir::SymbolRefAttr{}, - mlir::ValueRange{callable}, is_adj, controls, args); + mlir::ValueRange{callable}, is_adj, controls, args, {}, {}); }]>, OpBuilder<(ins "mlir::TypeRange":$retTy, "mlir::Value":$callable, @@ -435,7 +437,7 @@ def quake_ApplyOp : QuakeOp<"apply", "mlir::ValueRange":$controls, "mlir::ValueRange":$args), [{ return build($_builder, $_state, retTy, mlir::SymbolRefAttr{}, - mlir::ValueRange{callable}, is_adj, controls, args); + mlir::ValueRange{callable}, is_adj, controls, args, {}, {}); }]> ]; @@ -451,6 +453,14 @@ def quake_ApplyOp : QuakeOp<"apply", return {getActuals().begin(), getActuals().end()}; } + mlir::MutableOperandRange getArgOperandsMutable() { + auto range0 = getODSOperandIndexAndLength(0); + auto range2 = getODSOperandIndexAndLength(2); + auto mutableRange = ::mlir::MutableOperandRange(getOperation(), + range0.first, range2.second); + return mutableRange; + } + bool applyToVariant() { return getIsAdj() || !getControls().empty(); } @@ -461,6 +471,12 @@ def quake_ApplyOp : QuakeOp<"apply", return (*this)->getAttrOfType(getCalleeAttrName()); return getIndirectCallee().front(); } + + /// Set the callee for this operation. + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + (*this)->setAttr(getCalleeAttrName(), + llvm::cast(callee)); + } }]; } @@ -629,7 +645,9 @@ def quake_CallByRefOp : QuakeOp<"call_by_ref", [CallOpInterface]> { let arguments = (ins SymbolRefAttr:$callee, - Variadic:$args + Variadic:$args, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs ); let results = (outs Variadic); @@ -637,16 +655,33 @@ def quake_CallByRefOp : QuakeOp<"call_by_ref", [CallOpInterface]> { $callee `(` $args `)` `:` functional-type(operands, results) attr-dict }]; + let builders = [ + OpBuilder<(ins "mlir::SymbolRefAttr":$callee, + "mlir::TypeRange":$results, + "mlir::ValueRange":$args), [{ + return build($_builder, $_state, results, callee, args, {}, {}); + }]> + ]; + let hasVerifier = 1; let extraClassDeclaration = [{ operand_range getArgOperands() { return {operand_begin(), operand_end()}; } - + + mlir::MutableOperandRange getArgOperandsMutable() { + return getArgsMutable(); + } + mlir::CallInterfaceCallable getCallableForCallee() { return (*this)->getAttrOfType(getCalleeAttrName()); } + + void setCalleeFromCallable(mlir::CallInterfaceCallable callee) { + (*this)->setAttr(getCalleeAttrName(), + llvm::cast(callee)); + } }]; } @@ -1050,7 +1085,7 @@ def quake_ResetOp : QuakeOp<"reset", [QuantumGate, let extraClassDeclaration = [{ void getEffectsImpl(mlir::SmallVectorImpl> &effects) { - quake::getResetEffectsImpl(effects, getTargets()); + quake::getResetEffectsImpl(effects, getTargetsMutable()); } }]; } @@ -1087,7 +1122,7 @@ class Measurement : QuakeOp> &effects) { - quake::getMeasurementEffectsImpl(effects, getTargets()); + quake::getMeasurementEffectsImpl(effects, getTargetsMutable()); } }]; @@ -1252,7 +1287,7 @@ class QuakeOperator traits = [], void getEffectsImpl(mlir::SmallVectorImpl> &effects) { - quake::getOperatorEffectsImpl(effects, getControls(), getTargets()); + quake::getOperatorEffectsImpl(effects, getControlsMutable(), getTargetsMutable()); } //===------------------------------------------------------------------===// @@ -1415,7 +1450,7 @@ def quake_ExpPauliOp : QuakeOp<"exp_pauli", void getEffectsImpl(mlir::SmallVectorImpl> &effects) { - quake::getOperatorEffectsImpl(effects, getControls(), getTargets()); + quake::getOperatorEffectsImpl(effects, getControlsMutable(), getTargetsMutable()); } //===------------------------------------------------------------------===// diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index e9000d6421b..32ef9de4969 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -12,6 +12,11 @@ // These transforms can generally be thought of as "optimizations" or "rewrites" // on the IR. +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" @@ -47,7 +52,7 @@ void createTargetFinalizePipeline(mlir::OpPassManager &pm); /// crashes. void addDecomposition(mlir::OpPassManager &pm, mlir::ArrayRef enabledPats, - mlir::ArrayRef disabledPats = std::nullopt); + mlir::ArrayRef disabledPats = {}); void registerAOTPipelines(); void registerJITPipelines(); @@ -76,9 +81,6 @@ createQuakeSynthesizer(std::string_view, const void *, std::unique_ptr createPySynthCallableBlockArgs(const llvm::SmallVector &, bool removeBlockArg = false); -inline std::unique_ptr createPySynthCallableBlockArgs() { - return createPySynthCallableBlockArgs({}, false); -} /// Helper function to build an argument synthesis pass. The names of the /// functions and the substitutions text can be built as an unzipped pair of diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 910eef5142a..df5e413e68e 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -154,6 +154,7 @@ def BasisConversion : Pass<"basis-conversion", "mlir::ModuleOp"> { If no `basis` is specified or the pass cannot decompose all operations to the specified basis, the pass application will fail. }]; + let dependentDialects = ["mlir::arith::ArithDialect"]; let options = [ ListOption<"basis", "basis", "std::string", "Set of basis operations">, ListOption<"disabledPatterns", "disable-patterns", "std::string", @@ -317,6 +318,7 @@ def Decomposition : Pass<"decomposition", "mlir::ModuleOp"> { means no decomposition will take place under the presence of controlled `quake.apply` operations in the module. }]; + let dependentDialects = ["mlir::arith::ArithDialect"]; let options = [ ListOption<"basis", "basis", "std::string", "Set of basis operations">, ListOption<"disabledPatterns", "disable-patterns", "std::string", @@ -543,7 +545,9 @@ def GenerateKernelExecution : Pass<"kernel-execution", "mlir::ModuleOp"> { constants) regardless of the kernel's (semantically correct) signature. }]; - let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect"]; + let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect", + "mlir::func::FuncDialect", "mlir::cf::ControlFlowDialect", + "mlir::arith::ArithDialect"]; let options = [ Option<"outputFilename", "output-filename", "std::string", @@ -656,6 +660,9 @@ def LambdaLifting : Pass<"lambda-lifting", "mlir::ModuleOp"> { before the loop is analyzed. }]; + let dependentDialects = ["mlir::cf::ControlFlowDialect", + "mlir::func::FuncDialect"]; + let options = [ Option<"constantPropagation", "constant-prop", "bool", /*default=*/"false", "Enable specialization and constant propagation into lifted lambdas."> @@ -1060,6 +1067,10 @@ def QuakeSynthesize : Pass<"quake-synth", "mlir::ModuleOp"> { runtime values. }]; + let dependentDialects = ["mlir::arith::ArithDialect", + "mlir::complex::ComplexDialect", + "cudaq::cc::CCDialect", "mlir::math::MathDialect"]; + let constructor = "cudaq::opt::createQuakeSynthesizer()"; } @@ -1381,6 +1392,7 @@ def UnitarySynthesis : Pass<"unitary-synthesis", "mlir::ModuleOp"> { } ``` }]; + let dependentDialects = ["mlir::arith::ArithDialect", "mlir::func::FuncDialect"]; } def UnwindLowering : Pass<"unwind-lowering", "mlir::func::FuncOp"> { @@ -1406,6 +1418,9 @@ def UnwindLowering : Pass<"unwind-lowering", "mlir::func::FuncOp"> { The lower to CFG pass removed all structured operations from a function, lowering the body of the function completely to a primitive CFG. }]; + + let dependentDialects = ["mlir::cf::ControlFlowDialect", + "quake::QuakeDialect", "cudaq::cc::CCDialect"]; } def UpdateRegisterNames : Pass<"update-register-names"> { diff --git a/lib/Frontend/nvqpp/ASTBridge.cpp b/lib/Frontend/nvqpp/ASTBridge.cpp index 724b13e16e7..e6b5b46a64f 100644 --- a/lib/Frontend/nvqpp/ASTBridge.cpp +++ b/lib/Frontend/nvqpp/ASTBridge.cpp @@ -91,12 +91,6 @@ trimmedMangledTypeName(clang::QualType ty, return s; } -static std::string -trimmedMangledTypeName(const clang::Type *ty, - clang::ItaniumMangleContext *mangler) { - return trimmedMangledTypeName(clang::QualType(ty, /*Quals=*/0), mangler); -} - std::string cudaq::details::getTagNameOfFunctionDecl(const clang::FunctionDecl *func, clang::ItaniumMangleContext *mangler) { @@ -108,8 +102,10 @@ cudaq::details::getTagNameOfFunctionDecl(const clang::FunctionDecl *func, // template T operator()(args...) { ... } // }; // cudaq::get_class_kernel_name(); - auto name = "instance_" + - trimmedMangledTypeName(cxxCls->getTypeForDecl(), mangler); + auto name = + "instance_" + + trimmedMangledTypeName( + mangler->getASTContext().getCanonicalTagType(cxxCls), mangler); assert(cxxMethod->getTemplateSpecializationArgs()); for (auto &templArg : cxxMethod->getTemplateSpecializationArgs()->asArray()) @@ -120,7 +116,8 @@ cudaq::details::getTagNameOfFunctionDecl(const clang::FunctionDecl *func, } // Member function, but not a template function. // cudaq::get_class_kernel_name(); - auto name = trimmedMangledTypeName(cxxCls->getTypeForDecl(), mangler); + auto name = trimmedMangledTypeName( + mangler->getASTContext().getCanonicalTagType(cxxCls), mangler); LLVM_DEBUG(llvm::dbgs() << "member name is: " << name << '\n'); return name; } @@ -324,9 +321,8 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor { bool VisitVarDecl(clang::VarDecl *x) { if (isTupleReverseVar(x)) { - auto loc = x->getLocation(); - auto opt = x->getAnyInitializer()->getIntegerConstantExpr( - x->getASTContext(), &loc, false); + auto opt = + x->getAnyInitializer()->getIntegerConstantExpr(x->getASTContext()); if (opt) { LLVM_DEBUG(llvm::dbgs() << "tuples are reversed: " << *opt << '\n'); tuplesAreReversed = !opt->isZero(); @@ -335,9 +331,8 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor { if (cudaq::isInNamespace(x, "cudaq") && cudaq::isInNamespace(x, "details") && x->getName() == "_nvqpp_sizeof") { // This constexpr is the sizeof a pauli_word and a std::string. - auto loc = x->getLocation(); - auto opt = x->getAnyInitializer()->getIntegerConstantExpr( - x->getASTContext(), &loc, false); + auto opt = + x->getAnyInitializer()->getIntegerConstantExpr(x->getASTContext()); assert(opt && "must compute the sizeof a cudaq::pauli_word"); auto sizeofString = opt->getZExtValue(); auto sizeAttr = module->getAttr(cudaq::runtime::sizeofStringAttrName); @@ -359,8 +354,8 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor { if (auto *id = decl->getIdentifier()) { auto name = id->getName(); if (name == "qubit" || name == "qudit" || name == "qspan" || - name.startswith("qreg") || name.startswith("qvector") || - name.startswith("qarray") || name.startswith("qview")) + name.starts_with("qreg") || name.starts_with("qvector") || + name.starts_with("qarray") || name.starts_with("qview")) cudaq::details::reportClangError( x, mangler, "may not use quantum types in non-kernel functions"); @@ -511,8 +506,8 @@ void ASTBridgeAction::ASTBridgeConsumer::addFunctionDecl( isa(funcDecl) && !funcDecl->isStatic(); FunctionType hostFuncTy = opt::factory::toHostSideFuncType(funcTy, addThisPtr, *module); - auto func = build.create(loc, funcName, hostFuncTy, - ArrayRef{}); + auto func = func::FuncOp::create(build, loc, funcName, hostFuncTy, + ArrayRef{}); if (!addThisPtr) func->setAttr("no_this", build.getUnitAttr()); @@ -527,8 +522,8 @@ void ASTBridgeAction::ASTBridgeConsumer::addFunctionDecl( build.setInsertionPointToStart(block); SmallVector results; for (auto resTy : hostFuncTy.getResults()) - results.push_back(build.create(loc, resTy)); - build.create(loc, results); + results.push_back(cc::UndefOp::create(build, loc, resTy)); + func::ReturnOp::create(build, loc, results); } // Walk the arguments and add byval attributes where needed. @@ -710,7 +705,7 @@ std::string getCxxMangledTypeName(clang::QualType ty, clang::ItaniumMangleContext *mangler) { std::string s; llvm::raw_string_ostream os(s); - mangler->mangleTypeName(ty, os); + mangler->mangleCanonicalTypeName(ty, os); os.flush(); LLVM_DEBUG(llvm::dbgs() << "type name mangled as '" << s << "'\n"); return s; diff --git a/lib/Frontend/nvqpp/ConvertDecl.cpp b/lib/Frontend/nvqpp/ConvertDecl.cpp index fd01e4b5ec4..d546229d2c4 100644 --- a/lib/Frontend/nvqpp/ConvertDecl.cpp +++ b/lib/Frontend/nvqpp/ConvertDecl.cpp @@ -97,9 +97,9 @@ void QuakeBridgeVisitor::addArgumentSymbols( quake::VeqType, quake::WireType>(parmTy)) { symbolTable.insert(name, entryBlock->getArgument(index)); } else { - auto stackSlot = builder.create(loc, parmTy); - builder.create(loc, entryBlock->getArgument(index), - stackSlot); + auto stackSlot = cc::AllocaOp::create(builder, loc, parmTy); + cc::StoreOp::create(builder, loc, entryBlock->getArgument(index), + stackSlot); symbolTable.insert(name, stackSlot); } } @@ -447,8 +447,10 @@ bool QuakeBridgeVisitor::TraverseFunctionDecl(clang::FunctionDecl *x) { skipCompoundScope = true; // Visit the trailing requires clause, if any. - if (auto *trailingRequiresClause = x->getTrailingRequiresClause()) - if (!TraverseStmt(trailingRequiresClause)) + if (const auto &trailingRequiresClause = x->getTrailingRequiresClause(); + trailingRequiresClause.ConstraintExpr) + if (!TraverseStmt( + const_cast(trailingRequiresClause.ConstraintExpr))) return false; if (auto *ctor = dyn_cast(x)) { @@ -499,8 +501,8 @@ bool QuakeBridgeVisitor::TraverseFunctionDecl(clang::FunctionDecl *x) { auto loc = toLocation(x); SmallVector dummyResults; for (auto ty : funcTy.getResults()) - dummyResults.push_back(builder.create(loc, ty)); - builder.create(loc, dummyResults); + dummyResults.push_back(cc::UndefOp::create(builder, loc, ty)); + func::ReturnOp::create(builder, loc, dummyResults); } builder.clearInsertionPoint(); return true; @@ -516,7 +518,7 @@ bool QuakeBridgeVisitor::VisitCXXScalarValueInitExpr( if (ptrTy.getElementType() == ty) { auto v = popValue(); auto loc = toLocation(x); - return pushValue(builder.create(loc, v)); + return pushValue(cc::LoadOp::create(builder, loc, v)); } return true; } @@ -558,13 +560,13 @@ bool QuakeBridgeVisitor::VisitFunctionDecl(clang::FunctionDecl *x) { return false; } } - return pushValue(builder.create(loc, fTy, fSym)); + return pushValue(func::ConstantOp::create(builder, loc, fTy, fSym)); } auto [funcOp, alreadyAdded] = getOrAddFunc(loc, kernName, typeFromStack); if (!alreadyAdded) funcOp.setPrivate(); - return pushValue(builder.create( - loc, funcOp.getFunctionType(), funcOp.getSymNameAttr())); + return pushValue(func::ConstantOp::create( + builder, loc, funcOp.getFunctionType(), funcOp.getSymNameAttr())); } bool QuakeBridgeVisitor::VisitNamedDecl(clang::NamedDecl *x) { @@ -692,12 +694,12 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { qreg = popValue(); } else { // this is a qreg q; - auto qregSizeVal = builder.create( - loc, qregSize, builder.getIntegerType(64)); + auto qregSizeVal = mlir::arith::ConstantIntOp::create( + builder, loc, builder.getIntegerType(64), qregSize); if (qregSize != 0) - qreg = builder.create(loc, qType); + qreg = quake::AllocaOp::create(builder, loc, qType); else - qreg = builder.create(loc, qType, qregSizeVal); + qreg = quake::AllocaOp::create(builder, loc, qType, qregSizeVal); } symbolTable.insert(name, qreg); // allocated_qreg_names.push_back(name); @@ -710,12 +712,12 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { symbolTable.insert(name, peekValue()); return true; } - auto zero = builder.create( - loc, 0, builder.getIntegerType(64)); - auto qregSizeOne = builder.create( - loc, quake::VeqType::get(builder.getContext(), 1)); + auto zero = mlir::arith::ConstantIntOp::create( + builder, loc, builder.getIntegerType(64), 0); + auto qregSizeOne = quake::AllocaOp::create( + builder, loc, quake::VeqType::get(builder.getContext(), 1)); Value addressTheQubit = - builder.create(loc, qregSizeOne, zero); + quake::ExtractRefOp::create(builder, loc, qregSizeOne, zero); symbolTable.insert(name, addressTheQubit); return pushValue(addressTheQubit); } @@ -807,7 +809,7 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { // slot in which to save the value. This stack slot is the variable in the // memory domain. if (!x->getInit() || x->isCXXForRangeDecl()) { - Value alloca = builder.create(loc, type); + Value alloca = cc::AllocaOp::create(builder, loc, type); symbolTable.insert(x->getName(), alloca); return pushValue(alloca); } @@ -826,16 +828,16 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { if (initValue.getType().getIntOrFloatBitWidth() < type.getIntOrFloatBitWidth()) { // FIXME: Use zero-extend if this is unsigned! - initValue = builder.create( - loc, type, initValue, cudaq::cc::CastOpMode::Signed); + initValue = cudaq::cc::CastOp::create(builder, loc, type, initValue, + cudaq::cc::CastOpMode::Signed); } else if (initValue.getType().getIntOrFloatBitWidth() > type.getIntOrFloatBitWidth()) { - initValue = builder.create(loc, type, initValue); + initValue = cudaq::cc::CastOp::create(builder, loc, type, initValue); } } else if (isa(initValue.getType()) && isa(type)) { // FIXME: Use UIToFP if this is unsigned! - initValue = builder.create( - loc, type, initValue, cudaq::cc::CastOpMode::Signed); + initValue = cudaq::cc::CastOp::create(builder, loc, type, initValue, + cudaq::cc::CastOpMode::Signed); } if (auto initObject = initValue.getDefiningOp()) { @@ -861,7 +863,7 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { if (isStdvecBoolReference(qualTy) || qualTy.getTypePtr()->isReferenceType()) { // A similar case is when the C++ variable is a reference to a subobject. assert(isa(type)); - Value cast = builder.create(loc, type, initValue); + Value cast = cc::CastOp::create(builder, loc, type, initValue); symbolTable.insert(x->getName(), cast); return pushValue(cast); } @@ -874,8 +876,8 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { // Initialization expression resulted in a value. Create a variable and save // that value to the variable's memory address. - Value alloca = builder.create(loc, type); - builder.create(loc, initValue, alloca); + Value alloca = cc::AllocaOp::create(builder, loc, type); + cc::StoreOp::create(builder, loc, initValue, alloca); symbolTable.insert(x->getName(), alloca); return pushValue(alloca); } diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp index 70aaf25f990..47bc24e0cad 100644 --- a/lib/Frontend/nvqpp/ConvertExpr.cpp +++ b/lib/Frontend/nvqpp/ConvertExpr.cpp @@ -13,6 +13,8 @@ #include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "llvm/Support/Debug.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Math/IR/Math.h" #define DEBUG_TYPE "lower-ast-expr" @@ -74,25 +76,27 @@ maybeUnpackOperands(OpBuilder &builder, Location loc, ValueRange operands, if (isa(last_target.getType())) { // Split the vector. Last `targetCount` are targets, front `N-targetCount` // are controls. - auto vecSize = builder.create( - loc, builder.getIntegerType(64), targets); - auto size = builder.create( - loc, builder.getI64Type(), vecSize, cudaq::cc::CastOpMode::Unsigned); - - auto numTargets = - builder.create(loc, targetCount, 64); - auto offset = builder.create(loc, size, numTargets); - auto zero = builder.create(loc, 0, 64); - auto last = builder.create(loc, offset, numTargets); + auto vecSize = quake::VeqSizeOp::create( + builder, loc, builder.getIntegerType(64), targets); + auto size = + cudaq::cc::CastOp::create(builder, loc, builder.getI64Type(), vecSize, + cudaq::cc::CastOpMode::Unsigned); + + auto numTargets = arith::ConstantIntOp::create( + builder, loc, builder.getI64Type(), targetCount); + auto offset = arith::SubIOp::create(builder, loc, size, numTargets); + auto zero = + arith::ConstantIntOp::create(builder, loc, builder.getI64Type(), 0); + auto last = arith::SubIOp::create(builder, loc, offset, numTargets); // The canonicalizer will compute a constant size, if possible. auto unsizedVeqTy = quake::VeqType::getUnsized(builder.getContext()); // Get the subvector of all targets - Value targetSubveq = builder.create( - loc, unsizedVeqTy, last_target, zero, offset); + Value targetSubveq = quake::SubVeqOp::create(builder, loc, unsizedVeqTy, + last_target, zero, offset); // Get the subvector of all qubits excluding the last one: controls. - Value ctrlSubveq = builder.create(loc, unsizedVeqTy, - last_target, zero, last); + Value ctrlSubveq = quake::SubVeqOp::create(builder, loc, unsizedVeqTy, + last_target, zero, last); return std::make_pair(SmallVector{targetSubveq}, SmallVector{ctrlSubveq}); } @@ -155,11 +159,11 @@ bool buildOp(OpBuilder &builder, Location loc, ValueRange operands, negatedControlsAttribute(builder.getContext(), ctrls, negations); if (ctrls.empty()) for (auto t : target) - builder.create(loc, isAdjoint, params, ctrls, t, negs); + A::create(builder, loc, isAdjoint, params, ctrls, t, negs); else { assert(target.size() == 1 && "can only have a single target with control qubits."); - builder.create(loc, isAdjoint, params, ctrls, target, negs); + A::create(builder, loc, isAdjoint, params, ctrls, target, negs); } } else { assert(operands.size() >= 1 && "must be at least 1 operand"); @@ -168,15 +172,15 @@ bool buildOp(OpBuilder &builder, Location loc, ValueRange operands, if (!negations.empty()) reportNegateError(); Type i64Ty = builder.getI64Type(); - auto size = builder.create( - loc, builder.getIntegerType(64), target); - Value rank = builder.create( - loc, i64Ty, size, cudaq::cc::CastOpMode::Unsigned); + auto size = quake::VeqSizeOp::create(builder, loc, + builder.getIntegerType(64), target); + Value rank = cudaq::cc::CastOp::create(builder, loc, i64Ty, size, + cudaq::cc::CastOpMode::Unsigned); auto bodyBuilder = [&](OpBuilder &builder, Location loc, Region &, Block &block) { - Value ref = builder.create(loc, target, - block.getArgument(0)); - builder.create(loc, ValueRange(), ref); + Value ref = quake::ExtractRefOp::create(builder, loc, target, + block.getArgument(0)); + A::create(builder, loc, ValueRange(), ref); }; cudaq::opt::factory::createInvariantLoop(builder, loc, rank, bodyBuilder); } else { @@ -190,12 +194,12 @@ bool buildOp(OpBuilder &builder, Location loc, ValueRange operands, if (ctrls.empty()) // May have multiple targets, but no controls, op(q, r, s, ...) for (auto t : target) - builder.create(loc, isAdjoint, ValueRange(), ValueRange(), t, - negs); + A::create(builder, loc, isAdjoint, ValueRange(), ValueRange(), t, + negs); else { assert(target.size() == 1 && "can only have a single target with control qubits."); - builder.create(loc, isAdjoint, ValueRange(), ctrls, target, negs); + A::create(builder, loc, isAdjoint, ValueRange(), ctrls, target, negs); } } } @@ -204,14 +208,14 @@ bool buildOp(OpBuilder &builder, Location loc, ValueRange operands, static Value getConstantInt(OpBuilder &builder, Location loc, const uint64_t value, const int bitwidth) { - return builder.create(loc, value, - builder.getIntegerType(bitwidth)); + return arith::ConstantIntOp::create(builder, loc, + builder.getIntegerType(bitwidth), value); } static Value getConstantInt(OpBuilder &builder, Location loc, const uint64_t value, Type intTy) { assert(isa(intTy)); - return builder.create(loc, value, intTy); + return arith::ConstantIntOp::create(builder, loc, intTy, value); } template isUnsignedIntegerOrEnumerationType()) ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed; - lhs = builder.create(loc, rhs.getType(), lhs, mode); + lhs = cudaq::cc::CastOp::create(builder, loc, rhs.getType(), lhs, mode); return; } auto mode = (rhsType && rhsType->isUnsignedIntegerOrEnumerationType()) ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed; - rhs = builder.create(loc, lhs.getType(), rhs, mode); + rhs = cudaq::cc::CastOp::create(builder, loc, lhs.getType(), rhs, mode); return; } if (isa(lhsTy) && isa(rhsTy)) { if (lhsTy.getIntOrFloatBitWidth() < rhsTy.getIntOrFloatBitWidth()) { - lhs = builder.create(loc, rhs.getType(), lhs); + lhs = cudaq::cc::CastOp::create(builder, loc, rhs.getType(), lhs); return; } - rhs = builder.create(loc, lhs.getType(), rhs); + rhs = cudaq::cc::CastOp::create(builder, loc, lhs.getType(), rhs); return; } if (isa(lhsTy) && isa(rhsTy)) { auto mode = (rhsType && rhsType->isUnsignedIntegerOrEnumerationType()) ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed; - rhs = builder.create(loc, lhs.getType(), rhs, mode); + rhs = cudaq::cc::CastOp::create(builder, loc, lhs.getType(), rhs, mode); return; } if (isa(lhsTy) && isa(rhsTy)) { auto mode = (lhsType && lhsType->isUnsignedIntegerOrEnumerationType()) ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed; - lhs = builder.create(loc, rhs.getType(), lhs, mode); + lhs = cudaq::cc::CastOp::create(builder, loc, rhs.getType(), lhs, mode); return; } TODO_loc(loc, "conversion of operands in binary expression"); @@ -346,7 +350,7 @@ bool QuakeBridgeVisitor::VisitArraySubscriptExpr(clang::ArraySubscriptExpr *x) { return eleTy; }(); auto elePtrTy = cc::PointerType::get(arrEleTy); - return pushValue(builder.create(loc, elePtrTy, lhs, rhs)); + return pushValue(cc::ComputePtrOp::create(builder, loc, elePtrTy, lhs, rhs)); } bool QuakeBridgeVisitor::VisitFloatingLiteral(clang::FloatingLiteral *x) { @@ -380,7 +384,7 @@ bool QuakeBridgeVisitor::VisitCharacterLiteral(clang::CharacterLiteral *x) { auto intTy = builtinTypeToType(cast(x->getType().getTypePtr())); auto intVal = x->getValue(); - return pushValue(builder.create(loc, intVal, intTy)); + return pushValue(arith::ConstantIntOp::create(builder, loc, intTy, intVal)); } bool QuakeBridgeVisitor::VisitUnaryOperator(clang::UnaryOperator *x) { @@ -388,63 +392,63 @@ bool QuakeBridgeVisitor::VisitUnaryOperator(clang::UnaryOperator *x) { switch (x->getOpcode()) { case clang::UnaryOperatorKind::UO_PostInc: { auto var = popValue(); - auto loaded = builder.create(loc, var); - auto incremented = builder.create( - loc, loaded, + auto loaded = cc::LoadOp::create(builder, loc, var); + auto incremented = arith::AddIOp::create( + builder, loc, loaded, getConstantInt(builder, loc, 1, loaded.getType().getIntOrFloatBitWidth())); - builder.create(loc, incremented, var); + cc::StoreOp::create(builder, loc, incremented, var); return pushValue(loaded); } case clang::UnaryOperatorKind::UO_PreInc: { auto var = popValue(); - auto loaded = builder.create(loc, var); - auto incremented = builder.create( - loc, loaded, + auto loaded = cc::LoadOp::create(builder, loc, var); + auto incremented = arith::AddIOp::create( + builder, loc, loaded, getConstantInt(builder, loc, 1, loaded.getType().getIntOrFloatBitWidth())); - builder.create(loc, incremented, var); + cc::StoreOp::create(builder, loc, incremented, var); return pushValue(incremented); } case clang::UnaryOperatorKind::UO_PostDec: { auto var = popValue(); - auto loaded = builder.create(loc, var); - auto decremented = builder.create( - loc, loaded, + auto loaded = cc::LoadOp::create(builder, loc, var); + auto decremented = arith::SubIOp::create( + builder, loc, loaded, getConstantInt(builder, loc, 1, loaded.getType().getIntOrFloatBitWidth())); - builder.create(loc, decremented, var); + cc::StoreOp::create(builder, loc, decremented, var); return pushValue(loaded); } case clang::UnaryOperatorKind::UO_PreDec: { auto var = popValue(); - auto loaded = builder.create(loc, var); - auto decremented = builder.create( - loc, loaded, + auto loaded = cc::LoadOp::create(builder, loc, var); + auto decremented = arith::SubIOp::create( + builder, loc, loaded, getConstantInt(builder, loc, 1, loaded.getType().getIntOrFloatBitWidth())); - builder.create(loc, decremented, var); + cc::StoreOp::create(builder, loc, decremented, var); return pushValue(decremented); } case clang::UnaryOperatorKind::UO_LNot: { auto var = popValue(); - auto zero = builder.create(loc, 0, var.getType()); - Value unaryNot = - builder.create(loc, arith::CmpIPredicate::eq, var, zero); + auto zero = arith::ConstantIntOp::create(builder, loc, var.getType(), 0); + Value unaryNot = arith::CmpIOp::create(builder, loc, + arith::CmpIPredicate::eq, var, zero); return pushValue(unaryNot); } case clang::UnaryOperatorKind::UO_Minus: { auto subExpr = popValue(); auto resTy = subExpr.getType(); if (isa(resTy)) - return pushValue(builder.create( - loc, subExpr, + return pushValue(arith::MulIOp::create( + builder, loc, subExpr, getConstantInt(builder, loc, -1, resTy.getIntOrFloatBitWidth()))); if (isa(resTy)) { auto neg_one = opt::factory::createFloatConstant(loc, builder, -1.0, cast(resTy)); - return pushValue(builder.create(loc, subExpr, neg_one)); + return pushValue(arith::MulFOp::create(builder, loc, subExpr, neg_one)); } TODO_x(loc, x, mangler, "unknown type for unary minus"); return false; @@ -452,7 +456,7 @@ bool QuakeBridgeVisitor::VisitUnaryOperator(clang::UnaryOperator *x) { case clang::UnaryOperatorKind::UO_Deref: { auto subExpr = popValue(); assert(isa(subExpr.getType())); - return pushValue(builder.create(loc, subExpr)); + return pushValue(cc::LoadOp::create(builder, loc, subExpr)); } case clang::UnaryOperatorKind::UO_AddrOf: { auto subExpr = peekValue(); @@ -478,7 +482,7 @@ Value QuakeBridgeVisitor::floatingPointCoercion(Location loc, Type toType, if (toType == fromType) return value; assert(isa(fromType) && isa(toType)); - return builder.create(loc, toType, value); + return cudaq::cc::CastOp::create(builder, loc, toType, value); } Value QuakeBridgeVisitor::integerCoercion(Location loc, @@ -493,10 +497,10 @@ Value QuakeBridgeVisitor::integerCoercion(Location loc, auto mode = (clangTy->isUnsignedIntegerOrEnumerationType()) ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed; - return builder.create(loc, dstTy, srcVal, mode); + return cudaq::cc::CastOp::create(builder, loc, dstTy, srcVal, mode); } assert(fromTy.getIntOrFloatBitWidth() > dstTy.getIntOrFloatBitWidth()); - return builder.create(loc, dstTy, srcVal); + return cudaq::cc::CastOp::create(builder, loc, dstTy, srcVal); } /// Generalized kernel argument morphing. When traversing the AST, the calling @@ -520,17 +524,17 @@ SmallVector QuakeBridgeVisitor::convertKernelArgs( auto eleTy = ptrTy.getElementType(); if (eleTy == kTy) { // Promote pass-by-reference to pass-by-value. - auto load = builder.create(loc, v); + auto load = cudaq::cc::LoadOp::create(builder, loc, v); result.push_back(load); continue; } // We've passed clang++'s semantics checks but the types are distinct. if (isa(kTy)) { - result.push_back(builder.create(loc, kTy, v)); + result.push_back(cudaq::cc::CastOp::create(builder, loc, kTy, v)); continue; } - auto load = builder.create(loc, v); + auto load = cudaq::cc::LoadOp::create(builder, loc, v); auto loadTy = load.getType(); Value castTo; if (isa(loadTy) && isa(kTy)) { @@ -550,7 +554,7 @@ SmallVector QuakeBridgeVisitor::convertKernelArgs( // Both are Veq but the Veq are not identical. If the callee has a // dynamic size, we can relax the size from the calling context. if (vVecTy.hasSpecifiedSize() && !kVecTy.hasSpecifiedSize()) { - auto relax = builder.create(loc, kVecTy, v); + auto relax = quake::RelaxSizeOp::create(builder, loc, kVecTy, v); result.push_back(relax); continue; } @@ -590,7 +594,7 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { clang::QualType srcTy = x->getSubExpr()->getType(); // Check for and handle reference to integer cases. if (isa(mlirVal.getType())) - mlirVal = builder.create(loc, mlirVal); + mlirVal = cudaq::cc::LoadOp::create(builder, loc, mlirVal); return pushValue(integerCoercion(locSub, srcTy, castToTy, mlirVal)); }; @@ -601,7 +605,7 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { } case clang::CastKind::CK_BitCast: { auto value = popValue(); - return pushValue(builder.create(loc, castToTy, value)); + return pushValue(cudaq::cc::CastOp::create(builder, loc, castToTy, value)); } case clang::CastKind::CK_FloatingCast: { [[maybe_unused]] auto dstType = x->getType(); @@ -613,7 +617,7 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { assert(toType && fromType); if (toType == fromType) return pushValue(value); - return pushValue(builder.create(loc, toType, value)); + return pushValue(cudaq::cc::CastOp::create(builder, loc, toType, value)); } case clang::CastKind::CK_IntegralCast: { auto locSub = toLocation(x->getSubExpr()); @@ -633,7 +637,7 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed; return pushValue( - builder.create(loc, castToTy, popValue(), mode)); + cudaq::cc::CastOp::create(builder, loc, castToTy, popValue(), mode)); } case clang::CastKind::CK_IntegralToFloating: { auto mode = @@ -641,20 +645,20 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed; return pushValue( - builder.create(loc, castToTy, popValue(), mode)); + cudaq::cc::CastOp::create(builder, loc, castToTy, popValue(), mode)); } case clang::CastKind::CK_IntegralToBoolean: { auto last = popValue(); - Value zero = builder.create(loc, 0, last.getType()); - return pushValue(builder.create( - loc, arith::CmpIPredicate::ne, last, zero)); + Value zero = arith::ConstantIntOp::create(builder, loc, last.getType(), 0); + return pushValue(arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::ne, last, zero)); } case clang::CastKind::CK_FloatingToBoolean: { auto last = popValue(); Value zero = opt::factory::createFloatConstant( loc, builder, 0.0, cast(last.getType())); - return pushValue(builder.create( - loc, arith::CmpFPredicate::UNE, last, zero)); + return pushValue(arith::CmpFOp::create( + builder, loc, arith::CmpFPredicate::UNE, last, zero)); } case clang::CastKind::CK_UserDefinedConversion: { auto sub = popValue(); @@ -695,7 +699,7 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { if (isa(castToTy)) if (auto ptrTy = dyn_cast(peekValue().getType())) if (isa(ptrTy.getElementType())) - return pushValue(builder.create(loc, popValue())); + return pushValue(cudaq::cc::LoadOp::create(builder, loc, popValue())); if (auto funcTy = peelPointerFromFunction(castToTy)) if (auto fromTy = dyn_cast(peekValue().getType())) { auto inputs = funcTy.getInputs(); @@ -715,9 +719,10 @@ bool QuakeBridgeVisitor::VisitCastExpr(clang::CastExpr *x) { return false; if (x->getCastKind() == clang::CastKind::CK_IntegralToBoolean) { auto last = popValue(); - Value zero = builder.create(loc, 0, last.getType()); - return pushValue(builder.create( - loc, arith::CmpIPredicate::ne, last, zero)); + Value zero = + arith::ConstantIntOp::create(builder, loc, last.getType(), 0); + return pushValue(arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::ne, last, zero)); } } TODO_loc(loc, "unhandled implicit cast expression"); @@ -739,15 +744,15 @@ bool QuakeBridgeVisitor::TraverseBinaryOperator(clang::BinaryOperator *x, return false; auto lhsVal = popValue(); auto loc = toLocation(x->getSourceRange()); - auto zero = builder.create(loc, 0, lhsVal.getType()); - Value cond = builder.create(loc, - shortCircuitWhenTrue - ? arith::CmpIPredicate::ne + auto zero = arith::ConstantIntOp::create(builder, loc, lhsVal.getType(), 0); + Value cond = + arith::CmpIOp::create(builder, loc, + shortCircuitWhenTrue ? arith::CmpIPredicate::ne : arith::CmpIPredicate::eq, - lhsVal, zero); + lhsVal, zero); bool result = true; - auto ifOp = builder.create( - loc, TypeRange{cond.getType()}, cond, + auto ifOp = cc::IfOp::create( + builder, loc, TypeRange{cond.getType()}, cond, // Value if `cond` is true // For `BO_LAnd`, that means Value if lhs is zero (i.e. false) // For `BO_LOr`, that means Value if lhs is non-zero (i.e. true) @@ -760,13 +765,12 @@ bool QuakeBridgeVisitor::TraverseBinaryOperator(clang::BinaryOperator *x, builder.setInsertionPointToStart(&bodyBlock); if (x->getOpcode() == clang::BinaryOperatorKind::BO_LAnd) { // Return false out of this block in order to avoid evaluating rhs - auto constantFalse = - builder - .create(loc, builder.getBoolAttr(false)) - .getResult(); - builder.create(loc, TypeRange{}, constantFalse); + auto constantFalse = arith::ConstantOp::create( + builder, loc, builder.getBoolAttr(false)) + .getResult(); + cc::ContinueOp::create(builder, loc, TypeRange{}, constantFalse); } else { - builder.create(loc, TypeRange{}, cond); + cc::ContinueOp::create(builder, loc, TypeRange{}, cond); } }, // Value if `cond` is false @@ -784,7 +788,7 @@ bool QuakeBridgeVisitor::TraverseBinaryOperator(clang::BinaryOperator *x, return; } auto rhsVal = popValue(); - builder.create(loc, TypeRange{}, rhsVal); + cc::ContinueOp::create(builder, loc, TypeRange{}, rhsVal); }); if (!result) return result; @@ -802,7 +806,7 @@ bool QuakeBridgeVisitor::VisitBinaryOperator(clang::BinaryOperator *x) { auto loc = toLocation(x->getSourceRange()); auto maybeLoadValue = [&](Value v) -> Value { if (isa(v.getType())) - return builder.create(loc, v); + return cc::LoadOp::create(builder, loc, v); return v; }; @@ -836,7 +840,7 @@ bool QuakeBridgeVisitor::VisitBinaryOperator(clang::BinaryOperator *x) { default: TODO_loc(loc, "floating-point comparison"); } - return pushValue(builder.create(loc, pred, lhs, rhs)); + return pushValue(arith::CmpFOp::create(builder, loc, pred, lhs, rhs)); } arith::CmpIPredicate pred; auto lhsTy = x->getLHS()->getType(); @@ -866,12 +870,12 @@ bool QuakeBridgeVisitor::VisitBinaryOperator(clang::BinaryOperator *x) { default: TODO_loc(loc, "integer comparison"); } - return pushValue(builder.create(loc, pred, lhs, rhs)); + return pushValue(arith::CmpIOp::create(builder, loc, pred, lhs, rhs)); } switch (x->getOpcode()) { case clang::BinaryOperatorKind::BO_Assign: { - builder.create(loc, rhs, lhs); + cc::StoreOp::create(builder, loc, rhs, lhs); return pushValue(lhs); } case clang::BinaryOperatorKind::BO_AddAssign: @@ -894,60 +898,60 @@ bool QuakeBridgeVisitor::VisitBinaryOperator(clang::BinaryOperator *x) { switch (x->getOpcode()) { case clang::BinaryOperatorKind::BO_Add: { if (x->getType()->isIntegerType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::AddIOp::create(builder, loc, lhs, rhs)); if (x->getType()->isFloatingType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::AddFOp::create(builder, loc, lhs, rhs)); TODO_loc(loc, "error in bo_add binary op"); } case clang::BinaryOperatorKind::BO_Rem: { if (x->getType()->isIntegerType()) { if (x->getType()->isUnsignedIntegerOrEnumerationType()) - return pushValue(builder.create(loc, lhs, rhs)); - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::RemUIOp::create(builder, loc, lhs, rhs)); + return pushValue(arith::RemSIOp::create(builder, loc, lhs, rhs)); } if (x->getType()->isFloatingType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::AddFOp::create(builder, loc, lhs, rhs)); TODO_loc(loc, "error in bo_add binary op"); } case clang::BinaryOperatorKind::BO_Sub: { if (x->getType()->isIntegerType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::SubIOp::create(builder, loc, lhs, rhs)); if (x->getType()->isFloatingType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::SubFOp::create(builder, loc, lhs, rhs)); TODO_loc(loc, "error in bo_add binary op"); } case clang::BinaryOperatorKind::BO_Mul: { if (x->getType()->isIntegerType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::MulIOp::create(builder, loc, lhs, rhs)); if (x->getType()->isFloatingType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::MulFOp::create(builder, loc, lhs, rhs)); TODO_loc(loc, "error in bo_mul binary op"); } case clang::BinaryOperatorKind::BO_Div: { if (x->getType()->isIntegerType()) { if (x->getType()->isUnsignedIntegerOrEnumerationType()) - return pushValue(builder.create(loc, lhs, rhs)); - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::DivUIOp::create(builder, loc, lhs, rhs)); + return pushValue(arith::DivSIOp::create(builder, loc, lhs, rhs)); } if (x->getType()->isFloatingType()) - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::DivFOp::create(builder, loc, lhs, rhs)); TODO_loc(loc, "error in bo_div binary op"); } case clang::BinaryOperatorKind::BO_Shl: - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::ShLIOp::create(builder, loc, lhs, rhs)); case clang::BinaryOperatorKind::BO_Shr: if (x->getLHS()->getType()->isUnsignedIntegerOrEnumerationType()) - return pushValue(builder.create(loc, lhs, rhs)); - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(mlir::arith::ShRUIOp::create(builder, loc, lhs, rhs)); + return pushValue(mlir::arith::ShRSIOp::create(builder, loc, lhs, rhs)); case clang::BinaryOperatorKind::BO_Or: - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::OrIOp::create(builder, loc, lhs, rhs)); case clang::BinaryOperatorKind::BO_Xor: - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::XOrIOp::create(builder, loc, lhs, rhs)); case clang::BinaryOperatorKind::BO_And: - return pushValue(builder.create(loc, lhs, rhs)); + return pushValue(arith::AndIOp::create(builder, loc, lhs, rhs)); case clang::BinaryOperatorKind::BO_LAnd: case clang::BinaryOperatorKind::BO_LOr: emitFatalError(loc, "&& and || ops are handled elsewhere."); @@ -991,14 +995,14 @@ bool QuakeBridgeVisitor::TraverseConditionalOperator( return; } Value resultVal = popValue(); - builder.create(loc, TypeRange{}, resultVal); + cc::ContinueOp::create(builder, loc, TypeRange{}, resultVal); resultTy = resultVal.getType(); }; }; - auto ifOp = builder.create(loc, TypeRange{resultTy}, condVal, - thenElseLambda(x->getTrueExpr()), - thenElseLambda(x->getFalseExpr())); + auto ifOp = cc::IfOp::create(builder, loc, TypeRange{resultTy}, condVal, + thenElseLambda(x->getTrueExpr()), + thenElseLambda(x->getFalseExpr())); if (!result) return result; @@ -1026,8 +1030,8 @@ bool QuakeBridgeVisitor::VisitMaterializeTemporaryExpr( return true; // Materialize the value into a glvalue location in memory. - auto materialize = builder.create(loc, ty); - builder.create(loc, popValue(), materialize); + auto materialize = cc::AllocaOp::create(builder, loc, ty); + cc::StoreOp::create(builder, loc, popValue(), materialize); return pushValue(materialize); } @@ -1042,8 +1046,8 @@ bool QuakeBridgeVisitor::TraverseLambdaExpr(clang::LambdaExpr *x, if (!TraverseType(x->getType())) return false; auto callableTy = cast(popType()); - auto lambdaInstance = builder.create( - loc, callableTy, [&](OpBuilder &builder, Location loc) { + auto lambdaInstance = cc::CreateLambdaOp::create( + builder, loc, callableTy, [&](OpBuilder &builder, Location loc) { // FIXME: the capture list, etc. should be visited in an appropriate // context here, not as part of lowering the body of the lambda. auto *entryBlock = builder.getInsertionBlock(); @@ -1053,7 +1057,7 @@ bool QuakeBridgeVisitor::TraverseLambdaExpr(clang::LambdaExpr *x, result = false; return; } - builder.create(loc); + cc::ReturnOp::create(builder, loc); }); pushValue(lambdaInstance); return result; @@ -1086,7 +1090,7 @@ bool QuakeBridgeVisitor::VisitMemberExpr(clang::MemberExpr *x) { std::int32_t offset = field->getFieldIndex(); if (isa(object.getType())) { return pushValue( - builder.create(loc, ty, object, offset)); + quake::GetMemberOp::create(builder, loc, ty, object, offset)); } if (!isa(object.getType())) { reportClangError(x, mangler, @@ -1099,8 +1103,8 @@ bool QuakeBridgeVisitor::VisitMemberExpr(clang::MemberExpr *x) { if (arrTy.isUnknownSize()) offsets.push_back(0); offsets.push_back(offset); - return pushValue(builder.create( - loc, cc::PointerType::get(ty), object, offsets)); + return pushValue(cc::ComputePtrOp::create( + builder, loc, cc::PointerType::get(ty), object, offsets)); } return true; } @@ -1112,7 +1116,7 @@ bool QuakeBridgeVisitor::VisitUnaryExprOrTypeTraitExpr( switch (x->getKind()) { case clang::UnaryExprOrTypeTrait::UETT_SizeOf: return pushValue( - builder.create(loc, i64Ty, popType())); + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, popType())); default: break; } @@ -1147,16 +1151,16 @@ bool QuakeBridgeVisitor::visitMathLibFunc(clang::CallExpr *x, auto resTy = calleeTy.getResult(0); castToSameType(builder, loc, x->getArg(0)->getType().getTypePtrOrNull(), base, x->getArg(1)->getType().getTypePtrOrNull(), power); - auto ipow = builder.create(loc, base, power); + auto ipow = math::IPowIOp::create(builder, loc, base, power); if (isa(resTy)) - return pushValue(builder.create( - loc, resTy, ipow, cudaq::cc::CastOpMode::Signed)); + return pushValue(cudaq::cc::CastOp::create( + builder, loc, resTy, ipow, cudaq::cc::CastOpMode::Signed)); assert(resTy == ipow.getType()); return pushValue(ipow); } - return pushValue(builder.create(loc, base, power)); + return pushValue(math::FPowIOp::create(builder, loc, base, power)); } - return pushValue(builder.create(loc, base, power)); + return pushValue(math::PowFOp::create(builder, loc, base, power)); } auto floatOperator = [&](Op, const char *dblName) -> bool { @@ -1164,14 +1168,14 @@ bool QuakeBridgeVisitor::visitMathLibFunc(clang::CallExpr *x, Value arg = popValue(); [[maybe_unused]] auto funcConst = popValue(); if (isa(arg.getType())) - arg = builder.create( - loc, + arg = cudaq::cc::CastOp::create( + builder, loc, funcName == dblName ? builder.getF64Type() : builder.getF32Type(), arg, x->getArg(0)->getType()->isUnsignedIntegerOrEnumerationType() ? cudaq::cc::CastOpMode::Unsigned : cudaq::cc::CastOpMode::Signed); - return pushValue(builder.create(loc, arg)); + return pushValue(Op::create(builder, loc, arg)); }; // Handle std::sqrt @@ -1191,8 +1195,8 @@ bool QuakeBridgeVisitor::visitMathLibFunc(clang::CallExpr *x, Value arg = popValue(); [[maybe_unused]] auto funcConst = popValue(); if (isa(arg.getType())) - return pushValue(builder.create(loc, arg)); - return pushValue(builder.create(loc, arg)); + return pushValue(math::AbsIOp::create(builder, loc, arg)); + return pushValue(math::AbsFOp::create(builder, loc, arg)); } // Handle std::sin @@ -1253,13 +1257,13 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (isInClassInNamespace(func, "complex", "std")) { auto value = popValue(); if (isa(value.getType())) - value = builder.create(loc, value); + value = cc::LoadOp::create(builder, loc, value); if (funcName == "real") { if (auto memberCall = dyn_cast(x)) if (memberCall->getImplicitObjectArgument()) { [[maybe_unused]] auto calleeTy = popType(); assert(isa(calleeTy)); - return pushValue(builder.create(loc, value)); + return pushValue(complex::ReOp::create(builder, loc, value)); } } if (funcName == "imag") { @@ -1267,7 +1271,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (memberCall->getImplicitObjectArgument()) { [[maybe_unused]] auto calleeTy = popType(); assert(isa(calleeTy)); - return pushValue(builder.create(loc, value)); + return pushValue(complex::ImOp::create(builder, loc, value)); } } } @@ -1280,9 +1284,9 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { // Get the size of the std::vector. auto svec = popValue(); if (isa(svec.getType())) - svec = builder.create(loc, svec); + svec = cc::LoadOp::create(builder, loc, svec); auto ext = - builder.create(loc, builder.getI64Type(), svec); + cc::StdvecSizeOp::create(builder, loc, builder.getI64Type(), svec); if (funcName == "size") if (auto memberCall = dyn_cast(x)) if (memberCall->getImplicitObjectArgument()) { @@ -1295,9 +1299,9 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (memberCall->getImplicitObjectArgument()) { [[maybe_unused]] auto calleeTy = popType(); assert(isa(calleeTy)); - return pushValue(builder.create( - ext->getLoc(), arith::CmpIPredicate(arith::CmpIPredicate::eq), - ext.getResult(), + return pushValue(mlir::arith::CmpIOp::create( + builder, ext->getLoc(), + arith::CmpIPredicate(arith::CmpIPredicate::eq), ext.getResult(), getConstantInt( builder, ext->getLoc(), 0, ext.getResult().getType().getIntOrFloatBitWidth()))); @@ -1310,7 +1314,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto eleTy = cast(svec.getType()).getElementType(); auto elePtrTy = cc::PointerType::get(eleTy); return pushValue( - builder.create(loc, elePtrTy, svec)); + cc::StdvecDataOp::create(builder, loc, elePtrTy, svec)); } if (funcName == "back" || funcName == "rbegin") if (auto memberCall = dyn_cast(x)) @@ -1323,12 +1327,12 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto elePtrTy = cc::PointerType::get(eleTy); auto *ctx = eleTy.getContext(); auto i64Ty = mlir::IntegerType::get(ctx, 64); - auto vecPtr = builder.create(loc, eleArrTy, svec); - auto vecLen = builder.create(loc, i64Ty, svec); + auto vecPtr = cc::StdvecDataOp::create(builder, loc, eleArrTy, svec); + auto vecLen = cc::StdvecSizeOp::create(builder, loc, i64Ty, svec); Value vecLenMinusOne = - builder.create(loc, vecLen, negativeOneIndex); - return pushValue(builder.create( - loc, elePtrTy, vecPtr, ValueRange{vecLenMinusOne})); + arith::AddIOp::create(builder, loc, vecLen, negativeOneIndex); + return pushValue(cc::ComputePtrOp::create( + builder, loc, elePtrTy, vecPtr, ValueRange{vecLenMinusOne})); } if (funcName == "end") if (auto memberCall = dyn_cast(x)) @@ -1340,24 +1344,24 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); auto *ctx = eleTy.getContext(); auto i64Ty = mlir::IntegerType::get(ctx, 64); - auto vecPtr = builder.create(loc, eleArrTy, svec); - Value vecLen = builder.create(loc, i64Ty, svec); - return pushValue(builder.create( - loc, elePtrTy, vecPtr, ValueRange{vecLen})); + auto vecPtr = cc::StdvecDataOp::create(builder, loc, eleArrTy, svec); + Value vecLen = cc::StdvecSizeOp::create(builder, loc, i64Ty, svec); + return pushValue(cc::ComputePtrOp::create( + builder, loc, elePtrTy, vecPtr, ValueRange{vecLen})); } if (funcName == "rend") if (auto memberCall = dyn_cast(x)) if (memberCall->getImplicitObjectArgument()) { [[maybe_unused]] auto calleeTy = popType(); assert(isa(calleeTy)); - Value negativeOneIndex = - builder.create(loc, -1, 64); + Value negativeOneIndex = arith::ConstantIntOp::create( + builder, loc, builder.getI64Type(), -1); auto eleTy = cast(svec.getType()).getElementType(); auto elePtrTy = cc::PointerType::get(eleTy); auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); - auto vecPtr = builder.create(loc, eleArrTy, svec); - return pushValue(builder.create( - loc, elePtrTy, vecPtr, ValueRange{negativeOneIndex})); + auto vecPtr = cc::StdvecDataOp::create(builder, loc, eleArrTy, svec); + return pushValue(cc::ComputePtrOp::create( + builder, loc, elePtrTy, vecPtr, ValueRange{negativeOneIndex})); } if (funcName == "data") if (auto memberCall = dyn_cast(x)) @@ -1368,7 +1372,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto eleTy = cast(svec.getType()).getElementType(); auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); return pushValue( - builder.create(loc, eleArrTy, svec)); + cc::StdvecDataOp::create(builder, loc, eleArrTy, svec)); } TODO_loc(loc, "unhandled std::vector member function, " + funcName); @@ -1380,9 +1384,9 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { // Calling std::_Bit_reference::method(). auto loadFromReference = [&](mlir::Value ref) -> Value { if (auto mrTy = dyn_cast(ref.getType())) { - auto loadVal = builder.create(loc, ref); + auto loadVal = cc::LoadOp::create(builder, loc, ref); if (mrTy.getElementType() == builder.getI8Type()) - return builder.create(loc, builder.getI1Type(), loadVal); + return cc::CastOp::create(builder, loc, builder.getI1Type(), loadVal); return loadVal; } assert(ref.getType() == builder.getI1Type()); @@ -1390,7 +1394,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { }; if (isa(func)) { assert(isa(peekValue().getType())); - return pushValue(builder.create(loc, popValue())); + return pushValue(cc::LoadOp::create(builder, loc, popValue())); } if (func->isOverloadedOperator()) { auto overloadedOperator = func->getOverloadedOperator(); @@ -1398,8 +1402,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto rhs = loadFromReference(popValue()); auto lhs = loadFromReference(popValue()); popValue(); // The compare equal operator address. - return pushValue(builder.create( - loc, arith::CmpIPredicate::eq, lhs, rhs)); + return pushValue(arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::eq, lhs, rhs)); } if (isAssignmentOperator(overloadedOperator)) { auto rhs = loadFromReference(popValue()); @@ -1412,10 +1416,10 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (auto arrTy = dyn_cast(eleTy)) eleTy = arrTy.getElementType(); if (eleTy != rhs.getType()) - rhs = builder.create(loc, eleTy, rhs, - cc::CastOpMode::Unsigned); + rhs = cc::CastOp::create(builder, loc, eleTy, rhs, + cc::CastOpMode::Unsigned); } - builder.create(loc, rhs, lhs); + cc::StoreOp::create(builder, loc, rhs, lhs); return pushValue(loadFromReference(lhs)); } if (isSubscriptOperator(overloadedOperator)) { @@ -1424,7 +1428,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { popValue(); // The subscript operator address. auto bytePtrTy = cc::PointerType::get(builder.getI8Type()); return pushValue( - builder.create(loc, bytePtrTy, lhs, rhs)); + cc::ComputePtrOp::create(builder, loc, bytePtrTy, lhs, rhs)); } } TODO_loc(loc, "unhandled std::vector member function, " + funcName); @@ -1442,8 +1446,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { [[maybe_unused]] auto calleeTy = popType(); assert(isa(calleeTy)); auto qregArg = popValue(); - auto qrSize = builder.create( - loc, builder.getI64Type(), qregArg); + auto qrSize = quake::VeqSizeOp::create(builder, loc, + builder.getI64Type(), qregArg); return pushValue(qrSize); } @@ -1459,15 +1463,15 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { // Handle `r.front(n)` case. auto qrSize = actArgs.front(); auto one = getConstantInt(builder, loc, 1, 64); - auto offset = builder.create(loc, qrSize, one); + auto offset = arith::SubIOp::create(builder, loc, qrSize, one); auto unsizedVecTy = quake::VeqType::getUnsized(builder.getContext()); - return pushValue(builder.create( - loc, unsizedVecTy, qregArg, zero, offset)); + return pushValue(quake::SubVeqOp::create(builder, loc, unsizedVecTy, + qregArg, zero, offset)); } assert(actArgs.size() == 0); return pushValue( - builder.create(loc, qregArg, zero)); + quake::ExtractRefOp::create(builder, loc, qregArg, zero)); } if (funcName == "back") @@ -1477,22 +1481,22 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { assert(isa(calleeTy)); auto actArgs = lastValues(x->getNumArgs()); auto qregArg = popValue(); - auto qrSize = builder.create( - loc, builder.getI64Type(), qregArg); + auto qrSize = quake::VeqSizeOp::create(builder, loc, + builder.getI64Type(), qregArg); auto one = getConstantInt(builder, loc, 1, 64); - auto endOff = builder.create(loc, qrSize, one); + auto endOff = arith::SubIOp::create(builder, loc, qrSize, one); if (actArgs.size() == 1) { // Handle `r.back(n)` case. auto startOff = - builder.create(loc, qrSize, actArgs.front()); + arith::SubIOp::create(builder, loc, qrSize, actArgs.front()); auto unsizedVecTy = quake::VeqType::getUnsized(builder.getContext()); - return pushValue(builder.create( - loc, unsizedVecTy, qregArg, startOff, endOff)); + return pushValue(quake::SubVeqOp::create( + builder, loc, unsizedVecTy, qregArg, startOff, endOff)); } assert(actArgs.size() == 0); return pushValue( - builder.create(loc, qregArg, endOff)); + quake::ExtractRefOp::create(builder, loc, qregArg, endOff)); } if (funcName == "slice") { @@ -1506,11 +1510,11 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto count = actArgs[1]; auto one = getConstantInt(builder, loc, 1, 64); - Value offset = builder.create(loc, start, count); - offset = builder.create(loc, offset, one); + Value offset = arith::AddIOp::create(builder, loc, start, count); + offset = arith::SubIOp::create(builder, loc, offset, one); auto unsizedVecTy = quake::VeqType::getUnsized(builder.getContext()); - return pushValue(builder.create( - loc, unsizedVecTy, qregArg, start, offset)); + return pushValue(quake::SubVeqOp::create(builder, loc, unsizedVecTy, + qregArg, start, offset)); } } @@ -1556,7 +1560,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { pauliWord = load.getPtrvalue(); } else if (isCharspanPointerType(v.getType())) { // Load the char span, which is a char* - auto span = builder.create(loc, v); + auto span = cc::LoadOp::create(builder, loc, v); pauliWord = span; } else if (isa(v.getType())) { pauliWord = v; @@ -1580,14 +1584,15 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { SmallVector quantumArgs; for (std::size_t i = 2; i < args.size(); i++) quantumArgs.push_back(args[i]); - targets.push_back(builder.create( - loc, quake::VeqType::get(builder.getContext(), quantumArgs.size()), + targets.push_back(quake::ConcatOp::create( + builder, loc, + quake::VeqType::get(builder.getContext(), quantumArgs.size()), quantumArgs)); addTheString(args[1]); } - builder.create(loc, parameters, ValueRange{}, targets, - pauliWord); + quake::ExpPauliOp::create(builder, loc, parameters, ValueRange{}, targets, + pauliWord); return true; } @@ -1627,7 +1632,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (auto callee = calleeOp.getDefiningOp()) { StringRef calleeName = callee.getValue(); - builder.create(loc, calleeName, params, qubits); + quake::ApplyNoiseOp::create(builder, loc, calleeName, params, qubits); // Add the declaration of the function to the module. SmallVector argTys; @@ -1655,16 +1660,16 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (useStdvec) measTy = cc::StdvecType::get(measTy); if (funcName == "mx") - return builder.create(loc, measTy, args).getMeasOut(); + return quake::MxOp::create(builder, loc, measTy, args).getMeasOut(); if (funcName == "my") - return builder.create(loc, measTy, args).getMeasOut(); - return builder.create(loc, measTy, args).getMeasOut(); + return quake::MyOp::create(builder, loc, measTy, args).getMeasOut(); + return quake::MzOp::create(builder, loc, measTy, args).getMeasOut(); }(); Type resTy = builder.getI1Type(); if (useStdvec) resTy = cc::StdvecType::get(resTy); return pushValue( - builder.create(loc, resTy, measure)); + quake::DiscriminateOp::create(builder, loc, resTy, measure)); } // Handle the quantum gate set. @@ -1729,7 +1734,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (funcName == "reset") { if (!negations.empty()) reportNegateError(); - return builder.create(loc, TypeRange{}, args[0]); + return quake::ResetOp::create(builder, loc, TypeRange{}, args[0]); } if (funcName == "swap") { const auto size = args.size(); @@ -1741,7 +1746,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { SmallVector ctrls(args.begin(), args.begin() + size - 2); auto negs = negatedControlsAttribute(builder.getContext(), ctrls, negations); - auto swap = builder.create(loc, ctrls, targets); + auto swap = quake::SwapOp::create(builder, loc, ctrls, targets); if (negs) swap->setAttr("negated_qubit_controls", negs); return true; @@ -1823,16 +1828,16 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (!negations.empty()) reportNegateError(); Type i64Ty = builder.getI64Type(); - auto size = builder.create( - loc, builder.getIntegerType(64), target); - Value rank = builder.create( - loc, i64Ty, size, cudaq::cc::CastOpMode::Unsigned); + auto size = quake::VeqSizeOp::create( + builder, loc, builder.getIntegerType(64), target); + Value rank = cudaq::cc::CastOp::create(builder, loc, i64Ty, size, + cudaq::cc::CastOpMode::Unsigned); auto bodyBuilder = [&](OpBuilder &builder, Location loc, Region &, Block &block) { - Value ref = builder.create(loc, target, - block.getArgument(0)); - builder.create(loc, srefAttr, - ValueRange(), ref); + Value ref = quake::ExtractRefOp::create(builder, loc, target, + block.getArgument(0)); + quake::CustomUnitarySymbolOp::create(builder, loc, srefAttr, + ValueRange(), ref); }; cudaq::opt::factory::createInvariantLoop(builder, loc, rank, bodyBuilder); @@ -1849,9 +1854,9 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { SmallVector params; for (auto p : operands.take_front(paramCount)) if (isa(p.getType())) - params.push_back(builder.create(loc, p)); - builder.create( - loc, srefAttr, isAdjoint, params, ctrls, targets, negs); + params.push_back(cudaq::cc::LoadOp::create(builder, loc, p)); + quake::CustomUnitarySymbolOp::create(builder, loc, srefAttr, isAdjoint, + params, ctrls, targets, negs); } return true; } @@ -1942,9 +1947,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto kernelArgs = convertKernelArgs(loc, 2, args, kernelTy.getInputs(), x); inlinedStartControlNegations(); - builder.create(loc, TypeRange{}, calleeSymbol, - /*isAdjoint=*/false, ctrlValues, - kernelArgs); + quake::ApplyOp::create(builder, loc, TypeRange{}, calleeSymbol, + /*isAdjoint=*/false, ctrlValues, kernelArgs); return inlinedFinishControlNegations(); } if (auto func = calleeValue.getDefiningOp()) { @@ -1953,9 +1957,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { inlinedStartControlNegations(); auto kernelArgs = convertKernelArgs(loc, 2, args, funcTy.getInputs(), x); - builder.create(loc, funcTy.getResults(), callableSym, - /*isAdjoint=*/false, ctrlValues, - kernelArgs); + quake::ApplyOp::create(builder, loc, funcTy.getResults(), callableSym, + /*isAdjoint=*/false, ctrlValues, kernelArgs); return inlinedFinishControlNegations(); } if (auto ty = dyn_cast(calleeValue.getType())) { @@ -1994,13 +1997,13 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto kernelArgs = convertKernelArgs(loc, 2, args, funcTy.getInputs(), x); if (isKernelEntryPoint(callOperDecl)) { - builder.create( - loc, funcTy.getResults(), calleeSymbol, - /*isAdjoint=*/false, ctrlValues, kernelArgs); + quake::ApplyOp::create(builder, loc, funcTy.getResults(), + calleeSymbol, + /*isAdjoint=*/false, ctrlValues, kernelArgs); } else { - builder.create( - loc, funcTy.getResults(), calleeValue, - /*isAdjoint=*/false, ctrlValues, kernelArgs); + quake::ApplyOp::create(builder, loc, funcTy.getResults(), + calleeValue, + /*isAdjoint=*/false, ctrlValues, kernelArgs); } return inlinedFinishControlNegations(); } @@ -2054,17 +2057,17 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { assert(kernFunc && "kernel call operator must be present"); auto kernTy = kernFunc.getFunctionType(); auto kernArgs = convertKernelArgs(loc, 1, args, kernTy.getInputs(), x); - return builder.create(loc, TypeRange{}, kernelSymbol, - /*isAdjoint=*/true, ValueRange{}, - kernArgs); + return quake::ApplyOp::create(builder, loc, TypeRange{}, kernelSymbol, + /*isAdjoint=*/true, ValueRange{}, + kernArgs); } if (auto func = kernelValue.getDefiningOp()) { auto kernSym = func.getValueAttr(); auto funcTy = cast(func.getType()); auto kernArgs = convertKernelArgs(loc, 1, args, funcTy.getInputs(), x); - return builder.create(loc, funcTy.getResults(), kernSym, - /*isAdjoint=*/true, ValueRange{}, - kernArgs); + return quake::ApplyOp::create( + builder, loc, funcTy.getResults(), kernSym, + /*isAdjoint=*/true, ValueRange{}, kernArgs); } if (auto ty = dyn_cast(kernelTy)) { // In order to autogenerate the control form of the called kernel, we @@ -2100,12 +2103,12 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto kernelArgs = convertKernelArgs(loc, 1, args, funcTy.getInputs(), x); if (isKernelEntryPoint(callOperDecl)) { - return builder.create( - loc, funcTy.getResults(), kernelSymbol, + return quake::ApplyOp::create( + builder, loc, funcTy.getResults(), kernelSymbol, /*isAdjoint=*/true, ValueRange{}, kernelArgs); } - return builder.create( - loc, funcTy.getResults(), kernelValue, + return quake::ApplyOp::create( + builder, loc, funcTy.getResults(), kernelValue, /*isAdjoint=*/true, ValueRange{}, kernelArgs); } TODO_loc(loc, "value has !cc.lambda type but decl isn't a lambda"); @@ -2114,13 +2117,13 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { } if (funcName == "compute_action") { - builder.create(loc, /*is_dagger=*/false, args[0], - args[1]); + quake::ComputeActionOp::create(builder, loc, /*is_dagger=*/false, args[0], + args[1]); return true; } if (funcName == "compute_dag_action") { - builder.create(loc, /*is_dagger=*/true, args[0], - args[1]); + quake::ComputeActionOp::create(builder, loc, /*is_dagger=*/true, args[0], + args[1]); return true; } @@ -2132,7 +2135,7 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { } auto i64Ty = builder.getI64Type(); return pushValue( - builder.create(loc, i64Ty, cudaqConvertToInteger, args) + func::CallOp::create(builder, loc, i64Ty, cudaqConvertToInteger, args) .getResult(0)); } @@ -2146,21 +2149,21 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (eleTy == builder.getI1Type()) { eleTy = cc::ArrayType::get(builder.getI8Type()); ptrTy = cc::PointerType::get(eleTy); - vecPtr = builder.create(loc, ptrTy, args[0]); + vecPtr = cc::StdvecDataOp::create(builder, loc, ptrTy, args[0]); auto bits = svecTy.getElementType().getIntOrFloatBitWidth(); assert(bits > 0); - auto scale = builder.create(loc, (bits + 7) / 8, - args[1].getType()); - offset = builder.create(loc, scale, args[1]); + auto scale = arith::ConstantIntOp::create( + builder, loc, args[1].getType(), (bits + 7) / 8); + offset = arith::MulIOp::create(builder, loc, scale, args[1]); } else { ptrTy = cc::PointerType::get(eleTy); auto arrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); - vecPtr = builder.create(loc, arrTy, args[0]); + vecPtr = cc::StdvecDataOp::create(builder, loc, arrTy, args[0]); } - auto ptr = builder.create(loc, ptrTy, vecPtr, - ArrayRef{offset}); + auto ptr = cc::ComputePtrOp::create(builder, loc, ptrTy, vecPtr, + ArrayRef{offset}); return pushValue( - builder.create(loc, svecTy, ptr, args[2])); + cc::StdvecInitOp::create(builder, loc, svecTy, ptr, args[2])); } if (funcName == "range") { @@ -2171,31 +2174,33 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { irBuilder.loadIntrinsic(module, setCudaqRangeVector); assert(succeeded(result) && "loading intrinsic should never fail"); auto upVal = args[0]; - auto upper = builder.create(loc, i64Ty, upVal, - cc::CastOpMode::Unsigned); - auto buffer = builder.create(loc, i64Ty, upper); + auto upper = cc::CastOp::create(builder, loc, i64Ty, upVal, + cc::CastOpMode::Unsigned); + auto buffer = cc::AllocaOp::create(builder, loc, i64Ty, upper); auto stdvecTy = cc::StdvecType::get(i64Ty); - auto call = builder.create( - loc, stdvecTy, setCudaqRangeVector, ValueRange{buffer, upper}); + auto call = + func::CallOp::create(builder, loc, stdvecTy, setCudaqRangeVector, + ValueRange{buffer, upper}); return pushValue(call.getResult(0)); } assert(funcArity == 3); [[maybe_unused]] auto result = irBuilder.loadIntrinsic(module, setCudaqRangeVectorTriple); assert(succeeded(result) && "loading intrinsic should never fail"); - Value start = builder.create(loc, i64Ty, args[0], - cc::CastOpMode::Signed); - Value stop = builder.create(loc, i64Ty, args[1], - cc::CastOpMode::Signed); - Value step = builder.create(loc, i64Ty, args[2], - cc::CastOpMode::Signed); - auto lengthCall = builder.create( - loc, i64Ty, getCudaqSizeFromTriple, ValueRange{start, stop, step}); + Value start = cc::CastOp::create(builder, loc, i64Ty, args[0], + cc::CastOpMode::Signed); + Value stop = cc::CastOp::create(builder, loc, i64Ty, args[1], + cc::CastOpMode::Signed); + Value step = cc::CastOp::create(builder, loc, i64Ty, args[2], + cc::CastOpMode::Signed); + auto lengthCall = + func::CallOp::create(builder, loc, i64Ty, getCudaqSizeFromTriple, + ValueRange{start, stop, step}); Value length = lengthCall.getResult(0); - auto buffer = builder.create(loc, i64Ty, length); + auto buffer = cc::AllocaOp::create(builder, loc, i64Ty, length); auto stdvecTy = cc::StdvecType::get(i64Ty); - auto call = - builder.create(loc, stdvecTy, setCudaqRangeVectorTriple, + auto call = func::CallOp::create(builder, loc, stdvecTy, + setCudaqRangeVectorTriple, ValueRange{buffer, start, stop, step}); return pushValue(call.getResult(0)); } @@ -2252,8 +2257,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { continue; } // Resolve the raw pointer from this device_ptr handle. - Value result = builder.create( - loc, devFuncTy.getInputs()[i - argsOffset], args[i]); + Value result = cc::ResolveDevicePtrOp::create( + builder, loc, devFuncTy.getInputs()[i - argsOffset], args[i]); processedArgs.push_back(result); } @@ -2263,16 +2268,18 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto devCall = [&]() { if (maybeGPULaunchParams) { auto [numBlocks, numThreads] = maybeGPULaunchParams.value(); - Value blocks = - builder.create(loc, numBlocks, 64); - Value threadsPerBlock = - builder.create(loc, numThreads, 64); - return builder.create( - loc, devFuncTy.getResults(), symbol, ValueRange{blocks}, - ValueRange{threadsPerBlock}, deviceId, callArgs); + Value blocks = arith::ConstantIntOp::create( + builder, loc, builder.getI64Type(), numBlocks); + Value threadsPerBlock = arith::ConstantIntOp::create( + builder, loc, builder.getI64Type(), numThreads); + return cc::DeviceCallOp::create(builder, loc, devFuncTy.getResults(), + symbol, ValueRange{blocks}, + ValueRange{threadsPerBlock}, deviceId, + callArgs, ArrayAttr{}, ArrayAttr{}); } - return builder.create(loc, devFuncTy.getResults(), - symbol, deviceId, callArgs); + return cc::DeviceCallOp::create( + builder, loc, devFuncTy.getResults(), symbol, ValueRange{}, + ValueRange{}, deviceId, callArgs, ArrayAttr{}, ArrayAttr{}); }(); if (devFuncTy.getResults().empty()) return true; @@ -2292,8 +2299,8 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { // buffer. Create a loop that interchanges pairs as $(a_0, a_1-1)$, // $(a_0+1, a_1-2)$, ... until $a_0 + n \ge a_1 - n - 1$. auto i64Ty = builder.getI64Type(); - auto hiInt = builder.create(loc, i64Ty, args[1]); - auto loInt = builder.create(loc, i64Ty, args[0]); + auto hiInt = cc::CastOp::create(builder, loc, i64Ty, args[1]); + auto loInt = cc::CastOp::create(builder, loc, i64Ty, args[0]); auto ptrTy = cast(args[0].getType()); auto eleTy = ptrTy.getElementType(); auto arrTy = dyn_cast(eleTy); @@ -2305,30 +2312,30 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { } auto eleSize = eleTy.getIntOrFloatBitWidth(); auto adjust = getConstantInt(builder, loc, eleSize / 4, i64Ty); - auto dist = builder.create(loc, hiInt, loInt); - Value iters = builder.create(loc, dist, adjust); + auto dist = arith::SubIOp::create(builder, loc, hiInt, loInt); + Value iters = arith::DivSIOp::create(builder, loc, dist, adjust); auto ptrArrTy = cc::PointerType::get(arrTy); - Value basePtr = builder.create(loc, ptrArrTy, args[0]); + Value basePtr = cc::CastOp::create(builder, loc, ptrArrTy, args[0]); auto bodyBuilder = [&](OpBuilder &builder, Location loc, Region &, Block &block) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToStart(&block); auto iterIdx = block.getArgument(0); auto ptrA = - builder.create(loc, ptrTy, basePtr, iterIdx); - auto one = builder.create(loc, 1, i64Ty); - auto iters1 = builder.create(loc, iters, one); - Value hiIdx = builder.create(loc, iters1, iterIdx); + cc::ComputePtrOp::create(builder, loc, ptrTy, basePtr, iterIdx); + auto one = arith::ConstantIntOp::create(builder, loc, i64Ty, 1); + auto iters1 = arith::SubIOp::create(builder, loc, iters, one); + Value hiIdx = arith::SubIOp::create(builder, loc, iters1, iterIdx); auto ptrB = - builder.create(loc, ptrTy, basePtr, hiIdx); - Value loadA = builder.create(loc, ptrA); - Value loadB = builder.create(loc, ptrB); - builder.create(loc, loadA, ptrB); - builder.create(loc, loadB, ptrA); + cc::ComputePtrOp::create(builder, loc, ptrTy, basePtr, hiIdx); + Value loadA = cc::LoadOp::create(builder, loc, ptrA); + Value loadB = cc::LoadOp::create(builder, loc, ptrB); + cc::StoreOp::create(builder, loc, loadA, ptrB); + cc::StoreOp::create(builder, loc, loadB, ptrA); }; auto idxTy = builder.getI64Type(); - auto idxIters = builder.create( - loc, idxTy, iters, cudaq::cc::CastOpMode::Unsigned); + auto idxIters = cudaq::cc::CastOp::create( + builder, loc, idxTy, iters, cudaq::cc::CastOpMode::Unsigned); opt::factory::createInvariantLoop(builder, loc, idxIters, bodyBuilder); return true; } @@ -2350,9 +2357,10 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { if (specArgs[0].getKind() == clang::TemplateArgument::ArgKind::Integral) { std::int32_t offset = specArgs[0].getAsIntegral().getExtValue(); fixIfTuple(offset); - auto ptr = builder.create( - loc, resultTy, args[0], ArrayRef{offset}); - return pushValue(builder.create(loc, ptr)); + auto ptr = + cc::ComputePtrOp::create(builder, loc, resultTy, args[0], + ArrayRef{offset}); + return pushValue(cc::LoadOp::create(builder, loc, ptr)); } auto *selectTy = specArgs[0].getAsType().getTypePtr(); assert(specArgs[1].getKind() == clang::TemplateArgument::ArgKind::Pack); @@ -2360,9 +2368,10 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { for (auto &templateArg : specArgs[1].pack_elements()) { if (templateArg.getAsType().getTypePtr() == selectTy) { fixIfTuple(offset); - auto ptr = builder.create( - loc, resultTy, args[0], ArrayRef{offset}); - return pushValue(builder.create(loc, ptr)); + auto ptr = + cc::ComputePtrOp::create(builder, loc, resultTy, args[0], + ArrayRef{offset}); + return pushValue(cc::LoadOp::create(builder, loc, ptr)); } ++offset; } @@ -2381,8 +2390,9 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { auto funcResults = mlirFuncTy.getResults(); auto convertedArgs = convertKernelArgs(loc, 0, args, mlirFuncTy.getInputs(), x); - auto call = builder.create(loc, funcResults, calleeOp, - convertedArgs); + auto call = + func::CallIndirectOp::create(builder, loc, funcResults, calleeOp, + convertedArgs, ArrayAttr{}, ArrayAttr{}); if (call.getNumResults() > 0) { if (call.getNumResults() != 1) { reportClangError(x, mangler, "expect exactly one return value"); @@ -2399,22 +2409,22 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { return builder.getI8Type(); return et; }(); - auto data = builder.create( - loc, cudaq::cc::PointerType::get(eleTy), call.getResult(0)); + auto data = cudaq::cc::StdvecDataOp::create( + builder, loc, cudaq::cc::PointerType::get(eleTy), call.getResult(0)); auto i64Ty = builder.getI64Type(); - auto len = builder.create(loc, i64Ty, - call.getResult(0)); - auto eleSize = builder.create(loc, i64Ty, eleTy); - auto size = builder.create(loc, len, eleSize); - auto buffer = builder.create(loc, eleTy, size); + auto len = cudaq::cc::StdvecSizeOp::create(builder, loc, i64Ty, + call.getResult(0)); + auto eleSize = cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, eleTy); + auto size = arith::MulIOp::create(builder, loc, len, eleSize); + auto buffer = cudaq::cc::AllocaOp::create(builder, loc, eleTy, size); auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - auto cbuffer = builder.create(loc, i8PtrTy, buffer); - auto cdata = builder.create(loc, i8PtrTy, data); - builder.create(loc, TypeRange{}, - "__nvqpp_vectorCopyToStack", - ValueRange{cbuffer, cdata, size}); + auto cbuffer = cudaq::cc::CastOp::create(builder, loc, i8PtrTy, buffer); + auto cdata = cudaq::cc::CastOp::create(builder, loc, i8PtrTy, data); + func::CallOp::create(builder, loc, TypeRange{}, + "__nvqpp_vectorCopyToStack", + ValueRange{cbuffer, cdata, size}); Value newSpan = - builder.create(loc, vecTy, buffer, len); + cudaq::cc::StdvecInitOp::create(builder, loc, vecTy, buffer, len); return pushValue(newSpan); } return pushValue(call.getResult(0)); @@ -2495,7 +2505,7 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( // extract `Op` to the symbol table, but always generate a new // `quake.extract_ref` `Op` to get the exact qubit (reference) value. auto address_qubit = - builder.create(loc, qreg_var, idx_var); + quake::ExtractRefOp::create(builder, loc, qreg_var, idx_var); return replaceTOSValue(address_qubit); } // Get name of the qreg, e.g. qr, and use it to construct a name for the @@ -2515,7 +2525,7 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( // in the symbol table, and return the AddressQubit operation's // resulting value. auto address_qubit = - builder.create(loc, qreg_var, idx_var); + quake::ExtractRefOp::create(builder, loc, qreg_var, idx_var); // NB: varName is built from the variable name *and* the index value. This // front-end optimization is likely unnecessary as the compiler can always @@ -2529,7 +2539,7 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( auto indexVar = popValue(); auto svec = popValue(); if (isa(svec.getType())) - svec = builder.create(loc, svec); + svec = cc::LoadOp::create(builder, loc, svec); if (!isa(svec.getType())) { TODO_x(loc, x, mangler, "vector dereference"); return false; @@ -2539,9 +2549,9 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( eleTy = builder.getI8Type(); auto elePtrTy = cc::PointerType::get(eleTy); auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); - auto vecPtr = builder.create(loc, eleArrTy, svec); - auto eleAddr = builder.create(loc, elePtrTy, vecPtr, - ValueRange{indexVar}); + auto vecPtr = cc::StdvecDataOp::create(builder, loc, eleArrTy, svec); + auto eleAddr = cc::ComputePtrOp::create(builder, loc, elePtrTy, vecPtr, + ValueRange{indexVar}); return replaceTOSValue(eleAddr); } if (typeName == "_Bit_reference" || typeName == "__bit_reference" || @@ -2555,11 +2565,11 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( auto i8Ty = builder.getI8Type(); auto elePtrTy = cc::PointerType::get(i8Ty); auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(i8Ty)); - auto vecPtr = builder.create(loc, eleArrTy, svec); - auto eleAddr = builder.create(loc, elePtrTy, vecPtr, - ValueRange{indexVar}); + auto vecPtr = cc::StdvecDataOp::create(builder, loc, eleArrTy, svec); + auto eleAddr = cc::ComputePtrOp::create(builder, loc, elePtrTy, vecPtr, + ValueRange{indexVar}); auto i1PtrTy = cc::PointerType::get(builder.getI1Type()); - auto i1Cast = builder.create(loc, i1PtrTy, eleAddr); + auto i1Cast = cudaq::cc::CastOp::create(builder, loc, i1PtrTy, eleAddr); return replaceTOSValue(i1Cast); } TODO_loc(loc, "unhandled operator call for quake conversion"); @@ -2582,8 +2592,9 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( auto funcTy = cast(indirect.getType()); auto convertedArgs = convertKernelArgs(loc, 0, args, funcTy.getInputs(), x); - auto call = builder.create( - loc, funcTy.getResults(), indirect, convertedArgs); + auto call = func::CallIndirectOp::create( + builder, loc, funcTy.getResults(), indirect, convertedArgs, + ArrayAttr{}, ArrayAttr{}); if (call.getResults().empty()) return true; return pushValue(call.getResult(0)); @@ -2599,15 +2610,15 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( if (indCallTy) { [[maybe_unused]] auto discardedCallOp = popValue(); auto funcTy = cast(indCallTy.getSignature()); - auto call = builder.create( - loc, funcTy.getResults(), tos, args); + auto call = cc::CallIndirectCallableOp::create( + builder, loc, funcTy.getResults(), tos, args); if (call.getResults().empty()) return true; return pushValue(call.getResult(0)); } auto callableTy = cast(tosTy); - auto callInd = builder.create( - loc, callableTy.getSignature().getResults(), tos, args); + auto callInd = cc::CallCallableOp::create( + builder, loc, callableTy.getSignature().getResults(), tos, args); if (callInd.getResults().empty()) { popValue(); return true; @@ -2695,7 +2706,7 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { auto initListTy = popType(); if (size == 0) { // Nothing in the list. Just allocate the type. - return pushValue(builder.create(loc, initListTy)); + return pushValue(cc::AllocaOp::create(builder, loc, initListTy)); } // List has 1 or more members. @@ -2709,7 +2720,8 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { return isa(v.getType()); }); if (allRef && isa(initListTy)) - return pushValue(builder.create(loc, initListTy, last)); + return pushValue( + quake::MakeStruqOp::create(builder, loc, initListTy, last)); if (allRef && !isa(initListTy)) { // Initializer list contains all quantum reference types. In this case we @@ -2728,7 +2740,7 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { } return quake::VeqType::get(builder.getContext(), size); }(); - return pushValue(builder.create(loc, veqTy, last)); + return pushValue(quake::ConcatOp::create(builder, loc, veqTy, last)); } // Pass initialization list with one member as a Ref. return pushValue(last[0]); @@ -2740,7 +2752,8 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { std::int32_t structMems = structTy ? structTy.getMembers().size() : 0; std::int32_t numEles = structMems ? size / structMems : size; // Generate the array size value. - Value arrSize = builder.create(loc, numEles, 64); + Value arrSize = + arith::ConstantIntOp::create(builder, loc, builder.getI64Type(), numEles); // Allocate the required memory chunk. Type eleTy = [&]() { @@ -2775,23 +2788,22 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(module.getBody()); - builder - .create(loc, globalTy, name, f64Attr, - /*constant=*/true, /*external=*/false) + cc::GlobalOp::create(builder, loc, globalTy, name, f64Attr, + /*constant=*/true, /*external=*/false) .setPrivate(); } auto ptrTy = cc::PointerType::get(globalTy); - auto globalInit = builder.create(loc, ptrTy, name); + auto globalInit = cc::AddressOfOp::create(builder, loc, ptrTy, name); return pushValue(globalInit); } // If quantum, use value semantics with cc insert / extract value. if (isa(eleTy)) - return pushValue(builder.create(loc, eleTy, last)); + return pushValue(quake::MakeStruqOp::create(builder, loc, eleTy, last)); Value alloca = (numEles > 1) - ? builder.create(loc, eleTy, arrSize) - : builder.create(loc, eleTy); + ? cc::AllocaOp::create(builder, loc, eleTy, arrSize) + : cc::AllocaOp::create(builder, loc, eleTy); // Store the values in the allocated memory for (auto iter : llvm::enumerate(last)) { @@ -2802,32 +2814,32 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { if (numEles > 1) { auto ptrTy = cc::PointerType::get(structTy.getMembers()[i % structMems]); - ptr = builder.create( - loc, ptrTy, alloca, + ptr = cc::ComputePtrOp::create( + builder, loc, ptrTy, alloca, ArrayRef{i / structMems, i % structMems}); } else { auto ptrTy = cc::PointerType::get(structTy.getMembers()[i]); - ptr = builder.create(loc, ptrTy, alloca, - ArrayRef{i}); + ptr = cc::ComputePtrOp::create(builder, loc, ptrTy, alloca, + ArrayRef{i}); } } else { if (numEles > 1) { auto ptrTy = cc::PointerType::get(eleTy); - ptr = builder.create(loc, ptrTy, alloca, - ArrayRef{i}); + ptr = cc::ComputePtrOp::create(builder, loc, ptrTy, alloca, + ArrayRef{i}); } else { auto arrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); - auto cast = builder.create(loc, arrTy, alloca); + auto cast = cc::CastOp::create(builder, loc, arrTy, alloca); auto ptrTy = cc::PointerType::get(eleTy); - ptr = builder.create(loc, ptrTy, cast, - ArrayRef{i}); + ptr = cc::ComputePtrOp::create(builder, loc, ptrTy, cast, + ArrayRef{i}); } } assert(ptr && (v.getType() == cast(ptr.getType()).getElementType()) && "value type must match pointer element type"); - builder.create(loc, v, ptr); + cc::StoreOp::create(builder, loc, v, ptr); } return pushValue(alloca); @@ -2889,7 +2901,7 @@ bool QuakeBridgeVisitor::VisitCXXParenListInitExpr( return true; auto loc = toLocation(x); auto last = lastValues(structTy.getMembers().size()); - return pushValue(builder.create(loc, structTy, last)); + return pushValue(quake::MakeStruqOp::create(builder, loc, structTy, last)); } bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { @@ -2907,28 +2919,29 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (ctorName == "qudit") { // This is a single qubit. assert(isa(ctorTy)); - return pushValue(builder.create(loc)); + return pushValue(quake::AllocaOp::create(builder, loc)); } // These classes have template arguments that may give a compile-time // constant size. qarray is the only one that requires it, however. if (ctorName == "qreg" || ctorName == "qarray" || ctorName == "qspan") { [[maybe_unused]] auto veqTy = cast(ctorTy); assert(veqTy.hasSpecifiedSize()); - return pushValue(builder.create(loc, ctorTy)); + return pushValue(quake::AllocaOp::create(builder, loc, ctorTy)); } if (ctorName == "qvector") { // The default qvector ctor creates a veq of size 1. assert(isa(ctorTy)); auto veq1Ty = quake::VeqType::get(builder.getContext(), 1); - return pushValue(builder.create(loc, veq1Ty)); + return pushValue(quake::AllocaOp::create(builder, loc, veq1Ty)); } } else if (x->getNumArgs() == 1) { if (ctorName == "qreg") { // This is a cudaq::qreg(std::size_t). auto sizeVal = popValue(); assert(isa(sizeVal.getType())); - return pushValue(builder.create( - loc, quake::VeqType::getUnsized(builder.getContext()), sizeVal)); + return pushValue(quake::AllocaOp::create( + builder, loc, quake::VeqType::getUnsized(builder.getContext()), + sizeVal)); } if (ctorName == "state") { @@ -2941,17 +2954,17 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (auto stdvecTy = dyn_cast(stdvec.getType())) { auto dataTy = cudaq::cc::PointerType::get(stdvecTy.getElementType()); Value data = - builder.create(loc, dataTy, stdvec); + cudaq::cc::StdvecDataOp::create(builder, loc, dataTy, stdvec); auto i64Ty = builder.getI64Type(); Value size = - builder.create(loc, i64Ty, stdvec); - return pushValue(builder.create( - loc, stateTy, ValueRange{data, size})); + cudaq::cc::StdvecSizeOp::create(builder, loc, i64Ty, stdvec); + return pushValue(quake::CreateStateOp::create( + builder, loc, stateTy, ValueRange{data, size})); } if (auto alloc = stdvec.getDefiningOp()) { Value size = alloc.getSeqSize(); - return pushValue(builder.create( - loc, stateTy, ValueRange{alloc, size})); + return pushValue(quake::CreateStateOp::create( + builder, loc, stateTy, ValueRange{alloc, size})); } TODO_loc(loc, "unhandled state constructor"); return false; @@ -2970,13 +2983,14 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (auto load = initials.getDefiningOp()) initials = load.getPtrvalue(); if (isStateType(initials.getType())) { - Value alloca = builder.create(loc); + Value alloca = quake::AllocaOp::create(builder, loc); auto veq1Ty = quake::VeqType::get(builder.getContext(), 1); - Value initSt = builder.create( - loc, veq1Ty, ValueRange{alloca, initials}); + Value initSt = quake::InitializeStateOp::create( + builder, loc, veq1Ty, ValueRange{alloca, initials}); if (auto initOp = initials.getDefiningOp()) - builder.create(loc, initOp); - return pushValue(builder.create(loc, initSt, 0)); + quake::DeleteStateOp::create(builder, loc, initOp); + return pushValue( + quake::ExtractRefOp::create(builder, loc, initSt, 0)); } bool ok = false; if (auto ptrTy = dyn_cast(initials.getType())) @@ -2985,22 +2999,22 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (!ok) { // Invalid initializer ignored, but emit an error. reportClangError(x, mangler, "invalid qudit initial value"); - return pushValue(builder.create(loc)); + return pushValue(quake::AllocaOp::create(builder, loc)); } auto *ctx = builder.getContext(); auto veqTy = quake::VeqType::get(ctx, 1); - auto alloc = builder.create(loc, veqTy); - auto init = builder.create(loc, veqTy, alloc, - initials); - return pushValue(builder.create(loc, init, 0)); + auto alloc = quake::AllocaOp::create(builder, loc, veqTy); + auto init = quake::InitializeStateOp::create(builder, loc, veqTy, alloc, + initials); + return pushValue(quake::ExtractRefOp::create(builder, loc, init, 0)); } if (ctorName == "qvector") { auto initials = popValue(); auto *ctx = builder.getContext(); if (isa(initials.getType())) { // This is the cudaq::qvector(std::size_t) ctor. - return pushValue(builder.create( - loc, quake::VeqType::getUnsized(ctx), initials)); + return pushValue(quake::AllocaOp::create( + builder, loc, quake::VeqType::getUnsized(ctx), initials)); } if (isa(initials.getType())) if (auto load = initials.getDefiningOp()) @@ -3009,13 +3023,13 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { Value state = initials; auto i64Ty = builder.getI64Type(); auto numQubits = - builder.create(loc, i64Ty, state); + quake::GetNumberOfQubitsOp::create(builder, loc, i64Ty, state); auto veqTy = quake::VeqType::getUnsized(ctx); - Value alloc = builder.create(loc, veqTy, numQubits); - Value initSt = builder.create(loc, veqTy, - alloc, state); + Value alloc = quake::AllocaOp::create(builder, loc, veqTy, numQubits); + Value initSt = quake::InitializeStateOp::create(builder, loc, veqTy, + alloc, state); if (auto initOp = initials.getDefiningOp()) - builder.create(loc, initOp); + quake::DeleteStateOp::create(builder, loc, initOp); return pushValue(initSt); } @@ -3028,23 +3042,24 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (auto allocOp = initials.getDefiningOp()) if (auto size = allocOp.getSeqSize()) numQubits = - builder.create(loc, size); + math::CountTrailingZerosOp::create(builder, loc, size); } else { std::size_t arraySize = arrTy.getSize(); if (!std::has_single_bit(arraySize)) { reportClangError(x, mangler, "state vector must be a power of 2 in length"); } - numQubits = builder.create( - loc, std::countr_zero(arraySize), 64); + numQubits = arith::ConstantIntOp::create( + builder, loc, builder.getI64Type(), + std::countr_zero(arraySize)); } } } else if (auto stdvecTy = dyn_cast(initialsTy)) { - Value vecLen = builder.create( - loc, builder.getI64Type(), initials); - numQubits = builder.create(loc, vecLen); + Value vecLen = cc::StdvecSizeOp::create( + builder, loc, builder.getI64Type(), initials); + numQubits = math::CountTrailingZerosOp::create(builder, loc, vecLen); auto ptrTy = cc::PointerType::get(stdvecTy.getElementType()); - initials = builder.create(loc, ptrTy, initials); + initials = cc::StdvecDataOp::create(builder, loc, ptrTy, initials); } if (!numQubits) { reportClangError( @@ -3053,9 +3068,9 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { return false; } auto veqTy = quake::VeqType::getUnsized(ctx); - auto alloc = builder.create(loc, veqTy, numQubits); - return pushValue(builder.create( - loc, veqTy, alloc, initials)); + auto alloc = quake::AllocaOp::create(builder, loc, veqTy, numQubits); + return pushValue(quake::InitializeStateOp::create(builder, loc, veqTy, + alloc, initials)); } if ((ctorName == "qspan" || ctorName == "qview") && isa(peekValue().getType())) { @@ -3082,8 +3097,8 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (ctorName == "complex") { Value imag = popValue(); Value real = popValue(); - return pushValue(builder.create( - loc, ComplexType::get(real.getType()), real, imag)); + return pushValue(mlir::complex::CreateOp::create( + builder, loc, ComplexType::get(real.getType()), real, imag)); } if (ctorName == "function") { // Are we converting a lambda expr to a std::function? @@ -3101,10 +3116,8 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { TODO_loc(loc, "callable class with data members"); } // Constructor generated as degenerate reference to call operator. - auto *fromTy = x->getArg(0)->getType().getTypePtr(); - // FIXME: May need to peel off more than one layer of sugar? - if (auto *elabTy = dyn_cast(fromTy)) - fromTy = elabTy->desugar().getTypePtr(); + auto *fromTy = + x->getArg(0)->getType().getTypePtr()->getUnqualifiedDesugaredType(); auto *fromDecl = dyn_cast_or_null(fromTy)->getDecl(); if (!fromDecl) TODO_loc(loc, "recovering record type for a callable"); @@ -3123,13 +3136,13 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { auto kernelCallTy = cast(ctorTy); auto kernelName = generateCudaqKernelName(callOperDecl); popValue(); // replace value at TOS. - return pushValue(builder.create( - loc, kernelCallTy, [&](OpBuilder &builder, Location loc) { + return pushValue(cc::CreateLambdaOp::create( + builder, loc, kernelCallTy, [&](OpBuilder &builder, Location loc) { auto args = builder.getBlock()->getArguments(); - auto call = builder.create( - loc, kernelCallTy.getSignature().getResults(), kernelName, - args); - builder.create(loc, call.getResults()); + auto call = func::CallOp::create( + builder, loc, kernelCallTy.getSignature().getResults(), + kernelName, args); + cc::ReturnOp::create(builder, loc, call.getResults()); })); } } @@ -3154,8 +3167,8 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (auto ptrTy = dyn_cast(allocation.getType())) if (auto arrayTy = dyn_cast(ptrTy.getElementType())) if (auto definingOp = allocation.getDefiningOp()) - return pushValue(builder.create( - loc, cc::StdvecType::get(arrayTy.getElementType()), + return pushValue(cc::StdvecInitOp::create( + builder, loc, cc::StdvecType::get(arrayTy.getElementType()), allocation, definingOp.getSeqSize())); } @@ -3175,11 +3188,11 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { // memory chunk. Type ty = (eleTy == builder.getI1Type()) ? builder.getI8Type() : eleTy; - Value alloca = builder.create(loc, ty, arrSize); + Value alloca = cc::AllocaOp::create(builder, loc, ty, arrSize); // Create the stdvec_init op - return pushValue(builder.create( - loc, cc::StdvecType::get(eleTy), alloca, arrSize)); + return pushValue(cc::StdvecInitOp::create( + builder, loc, cc::StdvecType::get(eleTy), alloca, arrSize)); } return false; }; @@ -3217,7 +3230,7 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (isa(ctorTy)) { if (quake::isConstantQuantumRefType(ctorTy)) - return pushValue(builder.create(loc, ctorTy)); + return pushValue(quake::AllocaOp::create(builder, loc, ctorTy)); return true; } @@ -3238,17 +3251,17 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { // contain the object to load the value from. auto fromStruct = popValue(); assert(isa(ctorTy) && "POD must be a struct type"); - return pushValue(builder.create(loc, fromStruct)); + return pushValue(cc::LoadOp::create(builder, loc, fromStruct)); } } if (ctor->isCopyConstructor() && ctor->isTrivial() && isa(ctorTy)) { - auto copyObj = builder.create(loc, ctorTy); + auto copyObj = cc::AllocaOp::create(builder, loc, ctorTy); auto fromStruct = popValue(); - auto fromVal = builder.create(loc, fromStruct); - builder.create(loc, fromVal, copyObj); - return pushValue(builder.create(loc, copyObj)); + auto fromVal = cc::LoadOp::create(builder, loc, fromStruct); + cc::StoreOp::create(builder, loc, fromVal, copyObj); + return pushValue(cc::LoadOp::create(builder, loc, copyObj)); } // TODO: remove this when we can handle ctors more generally. @@ -3264,7 +3277,7 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { // 2) Allocate a new object. // 3) If not POD, call the constructor passing the address of the allocation // as `this`. - auto mem = builder.create(loc, ctorTy); + auto mem = cc::AllocaOp::create(builder, loc, ctorTy); // No constructor call needed for POD types if (parent->isPOD()) @@ -3278,7 +3291,7 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { FunctionType::get(builder.getContext(), TypeRange{mem.getType()}, {}); auto func = getOrAddFunc(loc, mangledName, funcTy).first; // FIXME: The ctor may not be the default ctor. Get all the args. - builder.create(loc, func, ValueRange{mem}); + func::CallOp::create(builder, loc, func, ValueRange{mem}); return pushValue(mem); } @@ -3330,8 +3343,8 @@ bool QuakeBridgeVisitor::VisitDeclRefExpr(clang::DeclRefExpr *x) { bool QuakeBridgeVisitor::VisitStringLiteral(clang::StringLiteral *x) { auto strLitTy = cc::PointerType::get(cc::ArrayType::get( builder.getContext(), builder.getI8Type(), x->getString().size() + 1)); - return pushValue(builder.create( - toLocation(x), strLitTy, builder.getStringAttr(x->getString()))); + return pushValue(cc::CreateStringLiteralOp::create( + builder, toLocation(x), strLitTy, builder.getStringAttr(x->getString()))); } } // namespace cudaq::details diff --git a/lib/Frontend/nvqpp/ConvertStmt.cpp b/lib/Frontend/nvqpp/ConvertStmt.cpp index 54bd9ca50ec..8512cca0b83 100644 --- a/lib/Frontend/nvqpp/ConvertStmt.cpp +++ b/lib/Frontend/nvqpp/ConvertStmt.cpp @@ -28,7 +28,7 @@ bool QuakeBridgeVisitor::VisitBreakStmt(clang::BreakStmt *x) { // statement. The bridge does not currently support switch statements. LLVM_DEBUG(llvm::dbgs() << "%% "; x->dump()); if (builder.getBlock()) - builder.create(toLocation(x)); + cc::UnwindBreakOp::create(builder, toLocation(x)); return true; } @@ -36,7 +36,7 @@ bool QuakeBridgeVisitor::VisitContinueStmt(clang::ContinueStmt *x) { // It is a C++ syntax error if a continue statement is not in a loop. LLVM_DEBUG(llvm::dbgs() << "%% "; x->dump()); if (builder.getBlock()) - builder.create(toLocation(x)); + cc::UnwindContinueOp::create(builder, toLocation(x)); return true; } @@ -69,53 +69,53 @@ bool QuakeBridgeVisitor::VisitCompoundAssignOperator( switch (x->getOpcode()) { case clang::BinaryOperatorKind::BO_AddAssign: { if (x->getType()->isIntegerType()) - return builder.create(loc, lhs, rhs); + return mlir::arith::AddIOp::create(builder, loc, lhs, rhs); if (x->getType()->isFloatingType()) - return builder.create(loc, lhs, rhs); + return mlir::arith::AddFOp::create(builder, loc, lhs, rhs); TODO_loc(loc, "Unknown type in assignment operator"); } case clang::BinaryOperatorKind::BO_SubAssign: { if (x->getType()->isIntegerType()) - return builder.create(loc, lhs, rhs); + return mlir::arith::SubIOp::create(builder, loc, lhs, rhs); if (x->getType()->isFloatingType()) - return builder.create(loc, lhs, rhs); + return mlir::arith::SubFOp::create(builder, loc, lhs, rhs); TODO_loc(loc, "Unknown type in assignment operator"); } case clang::BinaryOperatorKind::BO_MulAssign: { if (x->getType()->isIntegerType()) - return builder.create(loc, lhs, rhs); + return mlir::arith::MulIOp::create(builder, loc, lhs, rhs); if (x->getType()->isFloatingType()) - return builder.create(loc, lhs, rhs); + return mlir::arith::MulFOp::create(builder, loc, lhs, rhs); TODO_loc(loc, "Unknown type in assignment operator"); } case clang::BinaryOperatorKind::BO_DivAssign: { if (x->getType()->isIntegerType()) if (x->getType()->isUnsignedIntegerOrEnumerationType()) - return builder.create(loc, lhs, rhs); - return builder.create(loc, lhs, rhs); + return mlir::arith::DivUIOp::create(builder, loc, lhs, rhs); + return mlir::arith::DivSIOp::create(builder, loc, lhs, rhs); if (x->getType()->isFloatingType()) - return builder.create(loc, lhs, rhs); + return mlir::arith::DivFOp::create(builder, loc, lhs, rhs); TODO_loc(loc, "Unknown type in assignment operator"); } case clang::BinaryOperatorKind::BO_ShlAssign: - return builder.create(loc, lhs, rhs); + return mlir::arith::ShLIOp::create(builder, loc, lhs, rhs); case clang::BinaryOperatorKind::BO_ShrAssign: if (x->getType()->isUnsignedIntegerOrEnumerationType()) - return builder.create(loc, lhs, rhs); - return builder.create(loc, lhs, rhs); + return mlir::arith::ShRUIOp::create(builder, loc, lhs, rhs); + return mlir::arith::ShRSIOp::create(builder, loc, lhs, rhs); case clang::BinaryOperatorKind::BO_OrAssign: - return builder.create(loc, lhs, rhs); + return mlir::arith::OrIOp::create(builder, loc, lhs, rhs); case clang::BinaryOperatorKind::BO_XorAssign: - return builder.create(loc, lhs, rhs); + return mlir::arith::XOrIOp::create(builder, loc, lhs, rhs); case clang::BinaryOperatorKind::BO_AndAssign: - return builder.create(loc, lhs, rhs); + return mlir::arith::AndIOp::create(builder, loc, lhs, rhs); default: break; } TODO_loc(loc, "assignment operator"); }(); - builder.create(loc, result, lhsPtr); + cudaq::cc::StoreOp::create(builder, loc, result, lhsPtr); return pushValue(lhsPtr); } @@ -151,7 +151,7 @@ bool QuakeBridgeVisitor::TraverseCXXForRangeStmt(clang::CXXForRangeStmt *x, auto [iters, ptr, initial, stepBy] = [&]() -> std::tuple { if (auto call = buffer.getDefiningOp()) { - if (call.getCallee().equals(setCudaqRangeVector)) { + if (call.getCallee() == setCudaqRangeVector) { // The std::vector was produced by cudaq::range(). Optimize this // special case to use the loop control directly. Erase the transient // buffer and call here since neither is required. @@ -164,7 +164,7 @@ bool QuakeBridgeVisitor::TraverseCXXForRangeStmt(clang::CXXForRangeStmt *x, call->erase(); } return {i, {}, {}, {}}; - } else if (call.getCallee().equals(setCudaqRangeVectorTriple)) { + } else if (call.getCallee() == setCudaqRangeVectorTriple) { // Save operands before erasing the call. Value initial = call.getOperand(1); Value i = call.getOperand(2); @@ -173,7 +173,7 @@ bool QuakeBridgeVisitor::TraverseCXXForRangeStmt(clang::CXXForRangeStmt *x, Operation *callGetSizeOp = nullptr; if (auto seqSize = alloc.getSeqSize()) { if (auto callSize = seqSize.getDefiningOp()) - if (callSize.getCallee().equals(getCudaqSizeFromTriple)) + if (callSize.getCallee() == getCudaqSizeFromTriple) callGetSizeOp = callSize.getOperation(); } call->erase(); // erase call must be first @@ -187,8 +187,8 @@ bool QuakeBridgeVisitor::TraverseCXXForRangeStmt(clang::CXXForRangeStmt *x, return {i, {}, initial, stepBy}; } } - Value i = builder.create(loc, i64Ty, buffer); - Value p = builder.create(loc, dataArrPtrTy, buffer); + Value i = cc::StdvecSizeOp::create(builder, loc, i64Ty, buffer); + Value p = cc::StdvecDataOp::create(builder, loc, dataArrPtrTy, buffer); return {i, p, {}, {}}; }(); @@ -206,7 +206,7 @@ bool QuakeBridgeVisitor::TraverseCXXForRangeStmt(clang::CXXForRangeStmt *x, symbolTable.insert(loopVar->getName(), index); } else { Value addr = - builder.create(loc, dataPtrTy, ptr, index); + cc::ComputePtrOp::create(builder, loc, dataPtrTy, ptr, index); if (loopVar->getType().isConstQualified()) { // Read-only binding, so omit copy. symbolTable.insert(loopVar->getName(), addr); @@ -220,48 +220,48 @@ bool QuakeBridgeVisitor::TraverseCXXForRangeStmt(clang::CXXForRangeStmt *x, return; } auto iterVar = popValue(); - Value atOffset = builder.create(loc, addr); + Value atOffset = cc::LoadOp::create(builder, loc, addr); if (isBool) - atOffset = builder.create(loc, builder.getI1Type(), - atOffset); - builder.create(loc, atOffset, iterVar); + atOffset = cc::CastOp::create(builder, loc, builder.getI1Type(), + atOffset); + cc::StoreOp::create(builder, loc, atOffset, iterVar); } } if (!TraverseStmt(static_cast(body))) { result = false; return; } - builder.create(loc); + cc::ContinueOp::create(builder, loc); }; - builder.create(loc, scopeBuilder); + cc::ScopeOp::create(builder, loc, scopeBuilder); }; if (!initial) { - auto idxIters = builder.create( - loc, i64Ty, iters, cudaq::cc::CastOpMode::Unsigned); + auto idxIters = cudaq::cc::CastOp::create( + builder, loc, i64Ty, iters, cudaq::cc::CastOpMode::Unsigned); opt::factory::createInvariantLoop(builder, loc, idxIters, bodyBuilder); } else { - auto idxIters = builder.create( - loc, i64Ty, iters, cudaq::cc::CastOpMode::Signed); + auto idxIters = cudaq::cc::CastOp::create(builder, loc, i64Ty, iters, + cudaq::cc::CastOpMode::Signed); opt::factory::createMonotonicLoop(builder, loc, initial, idxIters, stepBy, bodyBuilder); } } else if (auto veqTy = dyn_cast(buffer.getType()); veqTy && veqTy.hasSpecifiedSize()) { - Value iters = - builder.create(loc, veqTy.getSize(), i64Ty); + Value iters = arith::ConstantIntOp::create( + builder, loc, i64Ty, static_cast(veqTy.getSize())); auto bodyBuilder = [&](OpBuilder &builder, Location loc, Region ®ion, Block &block) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToStart(&block); Value index = block.getArgument(0); - Value ref = builder.create(loc, buffer, index); + Value ref = quake::ExtractRefOp::create(builder, loc, buffer, index); symbolTable.insert(loopVar->getName(), ref); if (!TraverseStmt(static_cast(body))) result = false; }; - auto idxIters = builder.create( - loc, i64Ty, iters, cudaq::cc::CastOpMode::Unsigned); + auto idxIters = cudaq::cc::CastOp::create(builder, loc, i64Ty, iters, + cudaq::cc::CastOpMode::Unsigned); opt::factory::createInvariantLoop(builder, loc, idxIters, bodyBuilder); } else { TODO_x(toLocation(x), x, mangler, "ranged for statement"); @@ -327,13 +327,13 @@ bool QuakeBridgeVisitor::VisitReturnStmt(clang::ReturnStmt *x) { if (isa(resTy)) { // Promote reference (T&) to value (T) on a return. (There is not // necessarily an explicit cast or promotion node in the AST.) - auto load = builder.create(loc, result); + auto load = cc::LoadOp::create(builder, loc, result); result = load.getResult(); if (load.getType() == builder.getI8Type()) { auto fnTy = load->getParentOfType().getFunctionType(); auto i1Ty = builder.getI1Type(); if (fnTy.getNumResults() == 1 && fnTy.getResult(0) == i1Ty) - result = builder.create(loc, i1Ty, result); + result = cc::CastOp::create(builder, loc, i1Ty, result); } } if (auto vecTy = dyn_cast(resTy)) { @@ -346,16 +346,15 @@ bool QuakeBridgeVisitor::VisitReturnStmt(clang::ReturnStmt *x) { auto eleTy = vecTy.getElementType(); auto createVectorInit = [&](Value eleSize) { auto ptrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - Value resBuff = builder.create(loc, ptrTy, result); - Value dynSize = - builder.create(loc, builder.getI64Type(), result); + Value resBuff = cc::StdvecDataOp::create(builder, loc, ptrTy, result); + Value dynSize = cc::StdvecSizeOp::create(builder, loc, + builder.getI64Type(), result); Value heapCopy = - builder - .create(loc, ptrTy, "__nvqpp_vectorCopyCtor", - ValueRange{resBuff, dynSize, eleSize}) + func::CallOp::create(builder, loc, ptrTy, "__nvqpp_vectorCopyCtor", + ValueRange{resBuff, dynSize, eleSize}) .getResult(0); - return builder.create(loc, resTy, - ValueRange{heapCopy, dynSize}); + return cc::StdvecInitOp::create(builder, loc, resTy, + ValueRange{heapCopy, dynSize}); }; IRBuilder irb(builder); Value tySize; @@ -371,15 +370,15 @@ bool QuakeBridgeVisitor::VisitReturnStmt(clang::ReturnStmt *x) { result = createVectorInit(tySize); } if (isFuncScope) - builder.create(loc, result); + cc::ReturnOp::create(builder, loc, result); else - builder.create(loc, result); + cc::UnwindReturnOp::create(builder, loc, result); return true; } if (isFuncScope) - builder.create(loc); + cc::ReturnOp::create(builder, loc); else - builder.create(loc); + cc::UnwindReturnOp::create(builder, loc); return true; } @@ -407,10 +406,10 @@ bool QuakeBridgeVisitor::TraverseCompoundStmt(clang::CompoundStmt *stmt, traverseAndCheck(static_cast(cs)); return true; } - builder.create(loc, [&](OpBuilder &builder, Location loc) { + cc::ScopeOp::create(builder, loc, [&](OpBuilder &builder, Location loc) { for (auto *cs : stmt->body()) traverseAndCheck(static_cast(cs)); - builder.create(loc); + cc::ContinueOp::create(builder, loc); }); return true; } @@ -433,7 +432,7 @@ bool QuakeBridgeVisitor::traverseDoOrWhileStmt(S *x) { return; } auto val = popValue(); - builder.create(loc, val, ValueRange{}); + cc::ConditionOp::create(builder, loc, val, ValueRange{}); }; auto *body = x->getBody(); auto bodyBuilder = [&](OpBuilder &builder, Location loc, Region ®ion) { @@ -448,11 +447,11 @@ bool QuakeBridgeVisitor::traverseDoOrWhileStmt(S *x) { return; } if (!hasTerminator(region.back())) - builder.create(loc); + cc::ContinueOp::create(builder, loc); }; LLVM_DEBUG(llvm::dbgs() << "%% "; x->dump()); - builder.create(loc, ValueRange{}, postCondition, whileBuilder, - bodyBuilder); + cc::LoopOp::create(builder, loc, ValueRange{}, postCondition, whileBuilder, + bodyBuilder); return result; } @@ -483,27 +482,26 @@ bool QuakeBridgeVisitor::TraverseIfStmt(clang::IfStmt *x, return; } if (!hasTerminator(region.back())) - builder.create(loc); + cc::ContinueOp::create(builder, loc); }; }; auto *cond = x->getCond(); assert(cond && "if statement should have a condition"); LLVM_DEBUG(llvm::dbgs() << "%% "; x->dump()); if (auto *init = x->getInit()) { - builder.create(loc, [&](OpBuilder &builder, Location loc) { + cc::ScopeOp::create(builder, loc, [&](OpBuilder &builder, Location loc) { SymbolTableScope varScope(symbolTable); if (!TraverseStmt(init) || !TraverseStmt(cond)) { result = false; return; } if (x->getElse()) - builder.create(loc, TypeRange{}, popValue(), - stmtBuilder(x->getThen()), - stmtBuilder(x->getElse())); + cc::IfOp::create(builder, loc, TypeRange{}, popValue(), + stmtBuilder(x->getThen()), stmtBuilder(x->getElse())); else - builder.create(loc, TypeRange{}, popValue(), - stmtBuilder(x->getThen())); - builder.create(loc); + cc::IfOp::create(builder, loc, TypeRange{}, popValue(), + stmtBuilder(x->getThen())); + cc::ContinueOp::create(builder, loc); }); } else { // If there is no initialization expression, skip creating an `if` scope. @@ -516,19 +514,18 @@ bool QuakeBridgeVisitor::TraverseIfStmt(clang::IfStmt *x, // and add the required a load and cast. if (auto ptrTy = dyn_cast(peekValue().getType())) { Value v = popValue(); - pushValue(builder.create(loc, v)); + pushValue(cc::LoadOp::create(builder, loc, v)); if (ptrTy != builder.getI1Type()) { reportClangError(x, mangler, "expression in condition not yet supported"); } } if (x->getElse()) - builder.create(loc, TypeRange{}, popValue(), - stmtBuilder(x->getThen()), - stmtBuilder(x->getElse())); + cc::IfOp::create(builder, loc, TypeRange{}, popValue(), + stmtBuilder(x->getThen()), stmtBuilder(x->getElse())); else - builder.create(loc, TypeRange{}, popValue(), - stmtBuilder(x->getThen())); + cc::IfOp::create(builder, loc, TypeRange{}, popValue(), + stmtBuilder(x->getThen())); } return result; } @@ -551,7 +548,7 @@ bool QuakeBridgeVisitor::TraverseForStmt(clang::ForStmt *x, return; } auto val = popValue(); - builder.create(loc, val, ValueRange{}); + cc::ConditionOp::create(builder, loc, val, ValueRange{}); }; auto *body = x->getBody(); auto bodyBuilder = [&](OpBuilder &builder, Location loc, Region ®ion) { @@ -566,7 +563,7 @@ bool QuakeBridgeVisitor::TraverseForStmt(clang::ForStmt *x, return; } if (!hasTerminator(region.back())) - builder.create(loc); + cc::ContinueOp::create(builder, loc); }; auto *incr = x->getInc(); auto stepBuilder = [&](OpBuilder &builder, Location loc, Region ®ion) { @@ -584,19 +581,19 @@ bool QuakeBridgeVisitor::TraverseForStmt(clang::ForStmt *x, LLVM_DEBUG(llvm::dbgs() << "%% "; x->dump()); if (auto *init = x->getInit()) { SymbolTableScope var_scope(symbolTable); - builder.create(loc, [&](OpBuilder &builder, Location loc) { + cc::ScopeOp::create(builder, loc, [&](OpBuilder &builder, Location loc) { if (!TraverseStmt(static_cast(init))) { result = false; return; } - builder.create(loc, ValueRange{}, postCondition, whileBuilder, - bodyBuilder, stepBuilder); - builder.create(loc); + cc::LoopOp::create(builder, loc, ValueRange{}, postCondition, + whileBuilder, bodyBuilder, stepBuilder); + cc::ContinueOp::create(builder, loc); }); } else { // If there is no initialization expression, skip creating a `for` scope. - builder.create(loc, ValueRange{}, postCondition, whileBuilder, - bodyBuilder); + cc::LoopOp::create(builder, loc, ValueRange{}, postCondition, whileBuilder, + bodyBuilder); } const auto finalValueDepth = valueStack.size(); if (finalValueDepth > initialValueDepth) { diff --git a/lib/Frontend/nvqpp/ConvertType.cpp b/lib/Frontend/nvqpp/ConvertType.cpp index c21ef8d56a9..e38b6d47329 100644 --- a/lib/Frontend/nvqpp/ConvertType.cpp +++ b/lib/Frontend/nvqpp/ConvertType.cpp @@ -175,7 +175,8 @@ QuakeBridgeVisitor::findCallOperator(const clang::CXXRecordDecl *decl) { return nullptr; } -bool QuakeBridgeVisitor::TraverseRecordType(clang::RecordType *t) { +bool QuakeBridgeVisitor::TraverseRecordType(clang::RecordType *t, + bool &visitChildren) { auto *recDecl = t->getDecl(); if (ignoredClass(recDecl)) @@ -222,10 +223,10 @@ std::pair QuakeBridgeVisitor::getWidthAndAlignment(clang::RecordDecl *x) { auto *defn = x->getDefinition(); assert(defn && "struct must be defined here"); - auto *ty = defn->getTypeForDecl(); - if (ty->isDependentType()) + auto qualTy = getContext()->getCanonicalTagType(defn); + if (qualTy->isDependentType()) return {0, 0}; - auto ti = getContext()->getTypeInfo(ty); + auto ti = getContext()->getTypeInfo(qualTy); return {ti.Width, llvm::PowerOf2Ceil(ti.Align) / 8}; } diff --git a/lib/Optimizer/Builder/Factory.cpp b/lib/Optimizer/Builder/Factory.cpp index 12ab0feb5ca..e804a169c3f 100644 --- a/lib/Optimizer/Builder/Factory.cpp +++ b/lib/Optimizer/Builder/Factory.cpp @@ -91,39 +91,43 @@ factory::buildInvokeStructType(FunctionType funcTy, return cudaq::cc::StructType::get(ctx, eleTys, /*packed=*/false); } -Value factory::packIsArrayAndLengthArray(Location loc, - ConversionPatternRewriter &rewriter, - ModuleOp parentModule, - std::size_t numOperands, - ValueRange operands) { +Value factory::packIsArrayAndLengthArray( + Location loc, ConversionPatternRewriter &rewriter, ModuleOp parentModule, + std::size_t numOperands, ValueRange operands, ValueRange originalControls) { // Create an integer array where the kth element is N if the kth control // operand is a veq, and 0 otherwise. auto i64Type = rewriter.getI64Type(); - auto context = rewriter.getContext(); - Value isArrayAndLengthArr = createLLVMTemporary( - loc, rewriter, LLVM::LLVMPointerType::get(i64Type), numOperands); - auto intPtrTy = LLVM::LLVMPointerType::get(i64Type); - Value zero = rewriter.create(loc, 0, 64); + auto *context = rewriter.getContext(); + auto alignment = IntegerAttr::get(i64Type, 8); + auto ptrTy = LLVM::LLVMPointerType::get(context); + Value numOpnds = arith::ConstantIntOp::create(rewriter, loc, numOperands, 64); + Value isArrayAndLengthArr = LLVM::AllocaOp::create( + rewriter, loc, ptrTy, numOpnds, alignment, TypeAttr::get(i64Type)); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); auto getSizeSymbolRef = opt::factory::createLLVMFunctionSymbol( - opt::QIRArrayGetSize, i64Type, {opt::getArrayType(context)}, + opt::QIRArrayGetSize, i64Type, {cg::getLLVMArrayType(context)}, parentModule); for (auto iter : llvm::enumerate(operands)) { auto operand = iter.value(); auto i = iter.index(); - Value idx = rewriter.create(loc, i, 64); - Value ptr = rewriter.create(loc, intPtrTy, isArrayAndLengthArr, - ValueRange{idx}); + Value idx = arith::ConstantIntOp::create(rewriter, loc, i, 64); + Value ptr = LLVM::GEPOp::create(rewriter, loc, ptrTy, i64Type, + isArrayAndLengthArr, ValueRange{idx}); Value element; - if (operand.getType() == opt::getQubitType(context)) + // With opaque pointers, both qubit (RefType) and array (VeqType) convert + // to the same !llvm.ptr type, so we must check the original quake types + // to distinguish them. + bool isQubit = isa(originalControls[i].getType()); + if (isQubit) { element = zero; - else + } else { // get array size with the runtime function - element = rewriter - .create(loc, rewriter.getI64Type(), - getSizeSymbolRef, ValueRange{operand}) + element = LLVM::CallOp::create(rewriter, loc, i64Type, getSizeSymbolRef, + ValueRange{operand}) .getResult(); + } - rewriter.create(loc, element, ptr); + LLVM::StoreOp::create(rewriter, loc, element, ptr); } return isArrayAndLengthArr; } @@ -145,7 +149,7 @@ FlatSymbolRefAttr factory::createLLVMFunctionSymbol(StringRef name, // Insert the function since it hasn't been seen yet auto insPt = rewriter.saveInsertionPoint(); rewriter.setInsertionPointToStart(module.getBody()); - rewriter.create(module->getLoc(), name, fType); + LLVM::LLVMFuncOp::create(rewriter, module->getLoc(), name, fType); symbolRef = SymbolRefAttr::get(context, name); rewriter.restoreInsertionPoint(insPt); } @@ -166,7 +170,7 @@ func::FuncOp factory::createFunction(StringRef name, ArrayRef retTypes, // Insert the function since it hasn't been seen yet auto insPt = rewriter.saveInsertionPoint(); rewriter.setInsertionPointToStart(module.getBody()); - auto func = rewriter.create(module->getLoc(), name, fType); + auto func = func::FuncOp::create(rewriter, module->getLoc(), name, fType); rewriter.restoreInsertionPoint(insPt); return func; } @@ -199,40 +203,43 @@ void factory::createGlobalCtorCall(ModuleOp mod, FlatSymbolRefAttr ctor) { auto i32Ty = builder.getI32Type(); constexpr int prio = 17; auto prioAttr = ArrayAttr::get(ctx, {IntegerAttr::get(i32Ty, prio)}); - builder.create(loc, ctorAttr, prioAttr); + llvm::SmallVector data; + data.push_back(mlir::LLVM::ZeroAttr::get(mod.getContext())); + LLVM::GlobalCtorsOp::create(builder, loc, ctorAttr, prioAttr, + ArrayAttr::get(ctx, data)); } cc::LoopOp factory::createInvariantLoop( OpBuilder &builder, Location loc, Value totalIterations, llvm::function_ref bodyBuilder) { - Value zero = builder.create(loc, 0, 64); - Value one = builder.create(loc, 1, 64); + Value zero = arith::ConstantIntOp::create(builder, loc, 0, 64); + Value one = arith::ConstantIntOp::create(builder, loc, 1, 64); Type i64Ty = builder.getI64Type(); SmallVector inputs = {zero}; SmallVector resultTys = {i64Ty}; - auto loop = builder.create( - loc, resultTys, inputs, /*postCondition=*/false, + auto loop = cc::LoopOp::create( + builder, loc, resultTys, inputs, /*postCondition=*/false, [&](OpBuilder &builder, Location loc, Region ®ion) { cc::RegionBuilderGuard guard(builder, loc, region, TypeRange{i64Ty}); auto &block = *builder.getBlock(); - Value cmpi = builder.create( - loc, arith::CmpIPredicate::slt, block.getArgument(0), - totalIterations); - builder.create(loc, cmpi, block.getArguments()); + Value cmpi = + arith::CmpIOp::create(builder, loc, arith::CmpIPredicate::slt, + block.getArgument(0), totalIterations); + cc::ConditionOp::create(builder, loc, cmpi, block.getArguments()); }, [&](OpBuilder &builder, Location loc, Region ®ion) { cc::RegionBuilderGuard guard(builder, loc, region, TypeRange{i64Ty}); auto &block = *builder.getBlock(); bodyBuilder(builder, loc, region, block); - builder.create(loc, block.getArguments()); + cc::ContinueOp::create(builder, loc, block.getArguments()); }, [&](OpBuilder &builder, Location loc, Region ®ion) { cc::RegionBuilderGuard guard(builder, loc, region, TypeRange{i64Ty}); auto &block = *builder.getBlock(); auto incr = - builder.create(loc, block.getArgument(0), one); - builder.create(loc, ValueRange{incr}); + arith::AddIOp::create(builder, loc, block.getArgument(0), one); + cc::ContinueOp::create(builder, loc, ValueRange{incr}); }); loop->setAttr("invariant", builder.getUnitAttr()); return loop; @@ -252,7 +259,9 @@ Value factory::createLLVMTemporary(Location loc, OpBuilder &builder, Type type, OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToStart(entryBlock); Value len = genLlvmI64Constant(loc, builder, size); - return builder.create(loc, type, ArrayRef{len}); + return LLVM::AllocaOp::create( + builder, loc, LLVM::LLVMPointerType::get(builder.getContext()), type, + len); } Value factory::createTemporary(Location loc, OpBuilder &builder, Type type, @@ -266,8 +275,8 @@ Value factory::createTemporary(Location loc, OpBuilder &builder, Type type, assert(entryBlock && "function must have an entry block"); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToStart(entryBlock); - Value len = builder.create(loc, size, 64); - return builder.create(loc, type, len); + Value len = arith::ConstantIntOp::create(builder, loc, size, 64); + return cudaq::cc::AllocaOp::create(builder, loc, type, len); } // This builder will transform the monotonic loop into an invariant loop during @@ -284,44 +293,45 @@ cc::LoopOp factory::createMonotonicLoop( assert(succeeded(loadedIntrinsic) && "loading intrinsic should never fail"); auto i64Ty = builder.getI64Type(); Value begin = - builder.create(loc, i64Ty, start, cc::CastOpMode::Signed); + cc::CastOp::create(builder, loc, i64Ty, start, cc::CastOpMode::Signed); Value stepBy = - builder.create(loc, i64Ty, step, cc::CastOpMode::Signed); + cc::CastOp::create(builder, loc, i64Ty, step, cc::CastOpMode::Signed); Value end = - builder.create(loc, i64Ty, stop, cc::CastOpMode::Signed); - Value zero = builder.create(loc, 0, 64); + cc::CastOp::create(builder, loc, i64Ty, stop, cc::CastOpMode::Signed); + Value zero = arith::ConstantIntOp::create(builder, loc, 0, 64); SmallVector inputs = {zero, begin}; SmallVector resultTys = {i64Ty, i64Ty}; - auto totalIters = builder.create( - loc, i64Ty, getCudaqSizeFromTriple, ValueRange{begin, end, stepBy}); - auto loop = builder.create( - loc, resultTys, inputs, /*postCondition=*/false, + auto totalIters = + func::CallOp::create(builder, loc, i64Ty, getCudaqSizeFromTriple, + ValueRange{begin, end, stepBy}); + auto loop = cc::LoopOp::create( + builder, loc, resultTys, inputs, /*postCondition=*/false, [&](OpBuilder &builder, Location loc, Region ®ion) { cc::RegionBuilderGuard guard(builder, loc, region, TypeRange{i64Ty, i64Ty}); auto &block = *builder.getBlock(); - Value cmpi = builder.create( - loc, arith::CmpIPredicate::slt, block.getArgument(0), + Value cmpi = arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::slt, block.getArgument(0), totalIters.getResult(0)); - builder.create(loc, cmpi, block.getArguments()); + cc::ConditionOp::create(builder, loc, cmpi, block.getArguments()); }, [&](OpBuilder &builder, Location loc, Region ®ion) { cc::RegionBuilderGuard guard(builder, loc, region, TypeRange{i64Ty, i64Ty}); auto &block = *builder.getBlock(); bodyBuilder(builder, loc, region, block); - builder.create(loc, block.getArguments()); + cc::ContinueOp::create(builder, loc, block.getArguments()); }, [&](OpBuilder &builder, Location loc, Region ®ion) { cc::RegionBuilderGuard guard(builder, loc, region, TypeRange{i64Ty, i64Ty}); auto &block = *builder.getBlock(); - auto one = builder.create(loc, 1, 64); + auto one = arith::ConstantIntOp::create(builder, loc, 1, 64); Value count = - builder.create(loc, block.getArgument(0), one); + arith::AddIOp::create(builder, loc, block.getArgument(0), one); Value incr = - builder.create(loc, block.getArgument(1), stepBy); - builder.create(loc, ValueRange{count, incr}); + arith::AddIOp::create(builder, loc, block.getArgument(1), stepBy); + cc::ContinueOp::create(builder, loc, ValueRange{count, incr}); }); loop->setAttr("invariant", builder.getUnitAttr()); return loop; @@ -508,7 +518,7 @@ static bool shouldExpand(SmallVectorImpl &packedTys, } else if (theSet.size() == 1) { packedTys[packIdx] = theSet[0]; } else { - assert(theSet[0] == FloatType::getF32(ctx) && "must be float"); + assert(theSet[0] == Float32Type::get(ctx) && "must be float"); packedTys[packIdx] = VectorType::get(ArrayRef{2}, theSet[0]); } @@ -743,9 +753,9 @@ Value factory::createCast(OpBuilder &builder, Location loc, Type toType, return fromValue; auto unit = UnitAttr::get(builder.getContext()); UnitAttr none; - return builder.create(loc, toType, fromValue, - signExtend ? unit : none, - zeroExtend ? unit : none); + return cudaq::cc::CastOp::create(builder, loc, toType, fromValue, + signExtend ? unit : none, + zeroExtend ? unit : none); } std::vector> @@ -796,7 +806,7 @@ factory::getOrAddFunc(mlir::Location loc, mlir::StringRef funcName, OpBuilder::InsertionGuard guard(build); build.setInsertionPointToEnd(module.getBody()); SmallVector attrs; - func = build.create(loc, funcName, funcTy, attrs); + func = func::FuncOp::create(build, loc, funcName, funcTy, attrs); func.setPrivate(); return {func, /*defined=*/false}; } diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp index c611b15a1f5..fd3857fe53d 100644 --- a/lib/Optimizer/Builder/Intrinsics.cpp +++ b/lib/Optimizer/Builder/Intrinsics.cpp @@ -75,7 +75,7 @@ static constexpr IntrinsicCode intrinsicTable[] = { )#"}, {cudaq::runtime::deviceCodeHolderAdd, {}, R"#( - llvm.func @__cudaq_deviceCodeHolderAdd(!llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} + llvm.func @__cudaq_deviceCodeHolderAdd(!llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} )#"}, {cudaq::runtime::getLinkableKernelKey, {}, R"#( @@ -220,7 +220,7 @@ static constexpr IntrinsicCode intrinsicTable[] = { %false = arith.constant false %to0 = cc.cast %dest : (!cc.ptr) -> !cc.ptr %from0 = cc.cast %src : (!cc.ptr>) -> !cc.ptr - call @llvm.memcpy.p0i8.p0i8.i64(%to0, %from0, %len, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () + call @llvm.memcpy.p0.p0.i64(%to0, %from0, %len, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () return } )#"}, @@ -272,11 +272,11 @@ static constexpr IntrinsicCode intrinsicTable[] = { %3 = call @malloc(%2) : (i64) -> !cc.ptr %10 = cc.cast %3 : (!cc.ptr) -> !cc.ptr> %false = arith.constant false - call @llvm.memcpy.p0i8.p0i8.i64(%3, %arg0, %arg1, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () + call @llvm.memcpy.p0.p0.i64(%3, %arg0, %arg1, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () %4 = cc.compute_ptr %arg2[0] : (!cc.ptr, i64}>>) -> !cc.ptr> %5 = cc.load %4 : !cc.ptr> %6 = cc.compute_ptr %10[%arg1] : (!cc.ptr>, i64) -> !cc.ptr - call @llvm.memcpy.p0i8.p0i8.i64(%6, %5, %1, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () + call @llvm.memcpy.p0.p0.i64(%6, %5, %1, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () %7 = cc.undef !cc.struct<{!cc.ptr, i64}> %8 = cc.insert_value %7[0], %3 : (!cc.struct<{!cc.ptr, i64}>, !cc.ptr) -> !cc.struct<{!cc.ptr, i64}> %9 = cc.insert_value %8[1], %2 : (!cc.struct<{!cc.ptr, i64}>, i64) -> !cc.struct<{!cc.ptr, i64}> @@ -401,7 +401,7 @@ static constexpr IntrinsicCode intrinsicTable[] = { %size = arith.muli %arg1, %arg2 : i64 %0 = call @malloc(%size) : (i64) -> !cc.ptr %false = arith.constant false - call @llvm.memcpy.p0i8.p0i8.i64(%0, %arg0, %size, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () + call @llvm.memcpy.p0.p0.i64(%0, %arg0, %size, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () return %0 : !cc.ptr } )#"}, @@ -412,7 +412,7 @@ static constexpr IntrinsicCode intrinsicTable[] = { {"__nvqpp_vectorCopyToStack", {cudaq::llvmMemCopyIntrinsic, "free"}, R"#( func.func private @__nvqpp_vectorCopyToStack(%to: !cc.ptr, %from: !cc.ptr, %size: i64) { %false = arith.constant false - call @llvm.memcpy.p0i8.p0i8.i64(%to, %from, %size, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () + call @llvm.memcpy.p0.p0.i64(%to, %from, %size, %false) : (!cc.ptr, !cc.ptr, i64, i1) -> () call @free(%from) : (!cc.ptr) -> () return })#"}, @@ -502,7 +502,7 @@ static constexpr IntrinsicCode intrinsicTable[] = { "func.func private @cudaqRegisterKernelName(!cc.ptr) -> ()"}, {cudaq::runtime::CudaqRegisterLambdaName, {}, R"#( - llvm.func @cudaqRegisterLambdaName(!llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} + llvm.func @cudaqRegisterLambdaName(!llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} )#"}, {"free", {}, "func.func private @free(!cc.ptr) -> ()"}, @@ -513,15 +513,15 @@ static constexpr IntrinsicCode intrinsicTable[] = { func.func private @hybridLaunchKernel(!cc.ptr, !cc.ptr, !cc.ptr, i64, i64, !cc.ptr) -> !cc.struct<{!cc.ptr, i64}> )#"}, - // llvm.memcpy.p0i8.p0i8.i64 + // llvm.memcpy.p0.p0.i64 {cudaq::llvmMemCopyIntrinsic, {}, R"#( - func.func private @llvm.memcpy.p0i8.p0i8.i64(!cc.ptr, !cc.ptr, i64, i1) -> () + func.func private @llvm.memcpy.p0.p0.i64(!cc.ptr, !cc.ptr, i64, i1) -> () )#"}, - {cudaq::llvmMemSetIntrinsic, // llvm.memset.p0i8.i64 + {cudaq::llvmMemSetIntrinsic, // llvm.memset.p0.i64 {}, R"#( - func.func private @llvm.memset.p0i8.i64(!cc.ptr, i8, i64, i1) -> ())#"}, + func.func private @llvm.memset.p0.i64(!cc.ptr, i8, i64, i1) -> ())#"}, // NB: load llvmStackSave to get both. {cudaq::llvmStackRestore, @@ -647,8 +647,8 @@ static constexpr IntrinsicCode intrinsicTable[] = { !qir_array = !cc.ptr !qir_qubit = !cc.ptr !qir_result = !cc.ptr - !qir_charptr = !cc.ptr - !qir_llvmptr = !llvm.ptr + !qir_charptr = !cc.ptr + !qir_llvmptr = !llvm.ptr )#"}, // Use the obsolete LLVM opaque struct type. {"qir_opaque_struct", {}, R"#( @@ -656,7 +656,7 @@ static constexpr IntrinsicCode intrinsicTable[] = { !qir_qubit = !cc.ptr> !qir_result = !cc.ptr> !qir_charptr = !cc.ptr - !qir_llvmptr = !llvm.ptr + !qir_llvmptr = !llvm.ptr )#"}, // streamlinedLaunchKernel(kernelName, vectorArgPtrs) @@ -697,7 +697,7 @@ LLVM::GlobalOp IRBuilder::genCStringLiteral(Location loc, ModuleOp module, auto stringAttr = getStringAttr(cstring); OpBuilder::InsertionGuard guard(*this); setInsertionPointToEnd(module.getBody()); - return create(loc, cstringTy, /*isConstant=*/true, + return LLVM::GlobalOp::create(*this, loc, cstringTy, /*isConstant=*/true, LLVM::Linkage::Private, uniqName, stringAttr, /*alignment=*/0); } @@ -825,9 +825,9 @@ static cc::GlobalOp buildVectorOfConstantElements(Location loc, ModuleOp module, builder.setInsertionPointToEnd(module.getBody()); auto globalTy = cc::ArrayType::get(ctx, eleTy, arrayAttr.size()); auto global = - builder.create(loc, globalTy, name, arrayAttr, - /*constant=*/true, - /*external=*/false); + cudaq::cc::GlobalOp::create(builder, loc, globalTy, name, arrayAttr, + /*constant=*/true, + /*external=*/false); global.setPrivate(); return global; } diff --git a/lib/Optimizer/Builder/Marshal.cpp b/lib/Optimizer/Builder/Marshal.cpp index 7c272eb3f12..9cf7a481322 100644 --- a/lib/Optimizer/Builder/Marshal.cpp +++ b/lib/Optimizer/Builder/Marshal.cpp @@ -28,21 +28,22 @@ Value genStringLength(Location loc, OpBuilder &builder, Value stringArg, if constexpr (FromQPU) { Type stringTy = stringArg.getType(); assert(isa(stringTy)); - return builder.create(loc, builder.getI64Type(), - stringArg); + return cudaq::cc::StdvecSizeOp::create(builder, loc, builder.getI64Type(), + stringArg); } else /*constexpr */ { Type stringTy = stringArg.getType(); assert(isa(stringTy) && isa( cast(stringTy).getElementType()) && "host side string expected"); - auto callArg = builder.create( - loc, cudaq::cc::PointerType::get(builder.getI8Type()), stringArg); + auto callArg = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(builder.getI8Type()), + stringArg); StringRef helperName = module->getAttr(cudaq::runtime::sizeofStringAttrName) ? cudaq::runtime::getPauliWordSize : cudaq::runtime::bindingGetStringSize; - auto lenRes = builder.create(loc, builder.getI64Type(), - helperName, ValueRange{callArg}); + auto lenRes = func::CallOp::create(builder, loc, builder.getI64Type(), + helperName, ValueRange{callArg}); return lenRes.getResult(0); } } @@ -70,8 +71,8 @@ Value genVectorSize(Location loc, OpBuilder &builder, Value vecArg) { if constexpr (FromQPU) { Type vecArgTy = vecArg.getType(); assert(isa(vecArgTy)); - return builder.create(loc, builder.getI64Type(), - vecArg); + return cudaq::cc::StdvecSizeOp::create(builder, loc, builder.getI64Type(), + vecArg); } else /* constexpr */ { auto vecTy = cast(vecArg.getType()); auto vecStructTy = cast(vecTy.getElementType()); @@ -82,24 +83,26 @@ Value genVectorSize(Location loc, OpBuilder &builder, Value vecArg) { auto vecElePtrTy = cudaq::cc::PointerType::get(vecStructTy.getMember(0)); // Get the pointer to the pointer of the end of the array - Value endPtr = builder.create( - loc, vecElePtrTy, vecArg, ArrayRef{1}); + Value endPtr = + cudaq::cc::ComputePtrOp::create(builder, loc, vecElePtrTy, vecArg, + ArrayRef{1}); // Get the pointer to the pointer of the beginning of the array - Value beginPtr = builder.create( - loc, vecElePtrTy, vecArg, ArrayRef{0}); + Value beginPtr = + cudaq::cc::ComputePtrOp::create(builder, loc, vecElePtrTy, vecArg, + ArrayRef{0}); // Load to a T* - endPtr = builder.create(loc, endPtr); - beginPtr = builder.create(loc, beginPtr); + endPtr = cudaq::cc::LoadOp::create(builder, loc, endPtr); + beginPtr = cudaq::cc::LoadOp::create(builder, loc, beginPtr); // Map those pointers to integers Type i64Ty = builder.getI64Type(); - Value endInt = builder.create(loc, i64Ty, endPtr); - Value beginInt = builder.create(loc, i64Ty, beginPtr); + Value endInt = cudaq::cc::CastOp::create(builder, loc, i64Ty, endPtr); + Value beginInt = cudaq::cc::CastOp::create(builder, loc, i64Ty, beginPtr); // Subtracting these will give us the size in bytes. - return builder.create(loc, endInt, beginInt); + return arith::SubIOp::create(builder, loc, endInt, beginInt); } } @@ -107,11 +110,11 @@ Value cudaq::opt::marshal::genComputeReturnOffset( Location loc, OpBuilder &builder, FunctionType funcTy, cudaq::cc::StructType msgStructTy) { if (funcTy.getNumResults() == 0) - return builder.create(loc, NoResultOffset, 64); + return arith::ConstantIntOp::create(builder, loc, NoResultOffset, 64); std::int32_t numKernelArgs = funcTy.getNumInputs(); auto i64Ty = builder.getI64Type(); - return builder.create(loc, i64Ty, msgStructTy, - ArrayRef{numKernelArgs}); + return cc::OffsetOfOp::create(builder, loc, i64Ty, msgStructTy, + ArrayRef{numKernelArgs}); } void cudaq::opt::marshal::genReturnOffsetFunction( @@ -120,13 +123,13 @@ void cudaq::opt::marshal::genReturnOffsetFunction( auto *ctx = builder.getContext(); auto i64Ty = builder.getI64Type(); auto funcTy = FunctionType::get(ctx, {}, {i64Ty}); - auto returnOffsetFunc = - builder.create(loc, classNameStr + ".returnOffset", funcTy); + auto returnOffsetFunc = func::FuncOp::create( + builder, loc, classNameStr + ".returnOffset", funcTy); OpBuilder::InsertionGuard guard(builder); auto *entry = returnOffsetFunc.addEntryBlock(); builder.setInsertionPointToStart(entry); auto result = genComputeReturnOffset(loc, builder, devKernelTy, msgStructTy); - builder.create(loc, result); + func::ReturnOp::create(builder, loc, result); } static cudaq::cc::PointerType getByteAddressableType(OpBuilder &builder) { @@ -159,10 +162,10 @@ genByteSizeAndElementCount(Location loc, OpBuilder &builder, ModuleOp module, auto fTy = cast(eTy).getMember(0); auto tTy = cast(fTy).getElementType(); auto i64Ty = builder.getI64Type(); - auto eleSize = builder.create(loc, i64Ty, tTy); - Value count = builder.create(loc, size, eleSize); - auto ate = builder.create(loc, 8, 64); - size = builder.create(loc, count, ate); + auto eleSize = cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, tTy); + Value count = arith::DivSIOp::create(builder, loc, size, eleSize); + auto ate = arith::ConstantIntOp::create(builder, loc, 8, 64); + size = arith::MulIOp::create(builder, loc, count, ate); return {size, count}; } @@ -171,10 +174,10 @@ genByteSizeAndElementCount(Location loc, OpBuilder &builder, ModuleOp module, if (isa(eleTy)) { auto arrTy = cudaq::opt::factory::genHostStringType(module); auto words = - builder.create(loc, arrTy.getSize() / 8, 64); - size = builder.create(loc, size, words); - auto ate = builder.create(loc, 8, 64); - Value count = builder.create(loc, size, ate); + arith::ConstantIntOp::create(builder, loc, arrTy.getSize() / 8, 64); + size = arith::DivSIOp::create(builder, loc, size, words); + auto ate = arith::ConstantIntOp::create(builder, loc, 8, 64); + Value count = arith::DivSIOp::create(builder, loc, size, ate); return {size, count}; } @@ -186,11 +189,11 @@ genByteSizeAndElementCount(Location loc, OpBuilder &builder, ModuleOp module, auto vecEleTy = cast(vecEleRefTy).getElementType(); auto i64Ty = builder.getI64Type(); auto hostStrSize = - builder.create(loc, i64Ty, vecEleTy); - Value count = builder.create(loc, size, hostStrSize); + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, vecEleTy); + Value count = arith::DivSIOp::create(builder, loc, size, hostStrSize); Type packedTy = cudaq::opt::factory::genArgumentBufferType(eleTy); - auto packSize = builder.create(loc, i64Ty, packedTy); - size = builder.create(loc, count, packSize); + auto packSize = cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, packedTy); + size = arith::MulIOp::create(builder, loc, count, packSize); return {size, count}; } return {}; @@ -255,10 +258,10 @@ convertAllStdVectorBool(Location loc, OpBuilder &builder, ModuleOp module, cudaq::opt::factory::stlVectorType(stdvecTy.getElementType()); Value tmp = preallocated.has_value() ? *preallocated - : builder.create(loc, stdvecHostTy); - builder.create(loc, std::nullopt, - cudaq::stdvecBoolUnpackToInitList, - ArrayRef{tmp, arg, heapTracker}); + : cudaq::cc::AllocaOp::create(builder, loc, stdvecHostTy); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::stdvecBoolUnpackToInitList, + ArrayRef{tmp, arg, heapTracker}); return {tmp, true}; } @@ -271,20 +274,21 @@ convertAllStdVectorBool(Location loc, OpBuilder &builder, ModuleOp module, auto argVecTy = cast(ptrArgTy.getElementType()); auto subVecPtrTy = cudaq::cc::PointerType::get(argVecTy.getMember(0)); // Compute the pointer to the pointer to the first T element. - auto inputRef = builder.create( - loc, subVecPtrTy, arg, ArrayRef{0}); - auto startInput = builder.create(loc, inputRef); + auto inputRef = cudaq::cc::ComputePtrOp::create( + builder, loc, subVecPtrTy, arg, ArrayRef{0}); + auto startInput = cudaq::cc::LoadOp::create(builder, loc, inputRef); auto startTy = startInput.getType(); auto subArrTy = cudaq::cc::ArrayType::get( cast(startTy).getElementType()); - auto input = builder.create( - loc, cudaq::cc::PointerType::get(subArrTy), startInput); + auto input = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(subArrTy), startInput); auto transientTy = convertToTransientType(sty, module); auto tmp = [&]() -> Value { if (preallocated) - return builder.create( - loc, cudaq::cc::PointerType::get(transientTy), *preallocated); - return builder.create(loc, transientTy); + return cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(transientTy), + *preallocated); + return cudaq::cc::AllocaOp::create(builder, loc, transientTy); }(); Value sizeDelta = genVectorSize(loc, builder, arg); auto count = [&]() -> Value { @@ -293,39 +297,39 @@ convertAllStdVectorBool(Location loc, OpBuilder &builder, ModuleOp module, sizeDelta, arg, sty); return p.second; } - auto sizeEle = builder.create( - loc, builder.getI64Type(), seleTy); - return builder.create(loc, sizeDelta, sizeEle); + auto sizeEle = cudaq::cc::SizeOfOp::create(builder, loc, + builder.getI64Type(), seleTy); + return arith::DivSIOp::create(builder, loc, sizeDelta, sizeEle); }(); auto transEleTy = cast(transientTy).getMember(0); auto dataTy = cast(transEleTy).getElementType(); auto sizeTransientTy = - builder.create(loc, builder.getI64Type(), dataTy); + cudaq::cc::SizeOfOp::create(builder, loc, builder.getI64Type(), dataTy); Value sizeInBytes = - builder.create(loc, count, sizeTransientTy); + arith::MulIOp::create(builder, loc, count, sizeTransientTy); // Create a new vector that we'll store the converted data into. - Value byteBuffer = builder.create( - loc, builder.getI8Type(), sizeInBytes); + Value byteBuffer = cudaq::cc::AllocaOp::create( + builder, loc, builder.getI8Type(), sizeInBytes); // Initialize the temporary vector. auto vecEleTy = cudaq::cc::PointerType::get(transEleTy); - auto tmpBegin = builder.create( - loc, vecEleTy, tmp, ArrayRef{0}); + auto tmpBegin = cudaq::cc::ComputePtrOp::create( + builder, loc, vecEleTy, tmp, ArrayRef{0}); auto bufferBegin = - builder.create(loc, transEleTy, byteBuffer); - builder.create(loc, bufferBegin, tmpBegin); - auto tmpEnd = builder.create( - loc, vecEleTy, tmp, ArrayRef{1}); - auto byteBufferEnd = builder.create( - loc, cudaq::cc::PointerType::get(builder.getI8Type()), byteBuffer, - ArrayRef{sizeInBytes}); + cudaq::cc::CastOp::create(builder, loc, transEleTy, byteBuffer); + cudaq::cc::StoreOp::create(builder, loc, bufferBegin, tmpBegin); + auto tmpEnd = cudaq::cc::ComputePtrOp::create( + builder, loc, vecEleTy, tmp, ArrayRef{1}); + auto byteBufferEnd = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(builder.getI8Type()), + byteBuffer, ArrayRef{sizeInBytes}); auto bufferEnd = - builder.create(loc, transEleTy, byteBufferEnd); - builder.create(loc, bufferEnd, tmpEnd); - auto tmpEnd2 = builder.create( - loc, vecEleTy, tmp, ArrayRef{2}); - builder.create(loc, bufferEnd, tmpEnd2); + cudaq::cc::CastOp::create(builder, loc, transEleTy, byteBufferEnd); + cudaq::cc::StoreOp::create(builder, loc, bufferEnd, tmpEnd); + auto tmpEnd2 = cudaq::cc::ComputePtrOp::create( + builder, loc, vecEleTy, tmp, ArrayRef{2}); + cudaq::cc::StoreOp::create(builder, loc, bufferEnd, tmpEnd2); // Loop over each element in the outer vector and initialize it to the inner // vector value. The data may be heap allocated.) @@ -333,16 +337,17 @@ convertAllStdVectorBool(Location loc, OpBuilder &builder, ModuleOp module, auto transientBufferTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(transientEleTy)); auto buffer = - builder.create(loc, transientBufferTy, byteBuffer); + cudaq::cc::CastOp::create(builder, loc, transientBufferTy, byteBuffer); cudaq::opt::factory::createInvariantLoop( builder, loc, count, [&](OpBuilder &builder, Location loc, Region &, Block &block) { Value i = block.getArgument(0); - Value inp = builder.create( - loc, startTy, input, ArrayRef{i}); - auto currentVector = builder.create( - loc, cudaq::cc::PointerType::get(transientEleTy), buffer, + Value inp = cudaq::cc::ComputePtrOp::create( + builder, loc, startTy, input, + ArrayRef{i}); + auto currentVector = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(transientEleTy), buffer, ArrayRef{i}); convertAllStdVectorBool(loc, builder, module, inp, seleTy, heapTracker, currentVector); @@ -360,21 +365,21 @@ convertAllStdVectorBool(Location loc, OpBuilder &builder, ModuleOp module, // we'll store the converted data into. auto buffer = [&]() -> Value { if (preallocated) - return builder.create( - loc, cudaq::cc::PointerType::get(bufferTy), *preallocated); - return builder.create(loc, bufferTy); + return cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(bufferTy), *preallocated); + return cudaq::cc::AllocaOp::create(builder, loc, bufferTy); }(); // Loop over each element. Replace each with the converted value. for (auto iter : llvm::enumerate(sty.getMembers())) { std::int32_t i = iter.index(); Type memTy = iter.value(); - auto fromPtr = builder.create( - loc, cudaq::cc::PointerType::get(argStrTy.getMember(i)), arg, + auto fromPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(argStrTy.getMember(i)), arg, ArrayRef{i}); auto transientTy = convertToTransientType(memTy, module); - Value toPtr = builder.create( - loc, cudaq::cc::PointerType::get(transientTy), buffer, + Value toPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(transientTy), buffer, ArrayRef{i}); convertAllStdVectorBool(loc, builder, module, fromPtr, memTy, heapTracker, toPtr); @@ -425,30 +430,30 @@ Value descendThroughDynamicType(Location loc, OpBuilder &builder, // type, so walk over the vector and recurse on each element. // `size` is already the proper size of the lengths of each of the // elements in turn. - builder.create(loc, size, tmp); + cudaq::cc::StoreOp::create(builder, loc, size, tmp); auto ptrTy = cast(arg.getType()); auto strTy = cast(ptrTy.getElementType()); auto memTy = cast(strTy.getMember(0)); auto arrTy = cudaq::cc::PointerType::get(cudaq::cc::PointerType::get( cudaq::cc::ArrayType::get(memTy.getElementType()))); - auto castPtr = builder.create(loc, arrTy, arg); - auto castArg = builder.create(loc, castPtr); + auto castPtr = cudaq::cc::CastOp::create(builder, loc, arrTy, arg); + auto castArg = cudaq::cc::LoadOp::create(builder, loc, castPtr); auto castPtrTy = cudaq::cc::PointerType::get(memTy.getElementType()); cudaq::opt::factory::createInvariantLoop( builder, loc, count, [&](OpBuilder &builder, Location loc, Region &, Block &block) { Value i = block.getArgument(0); - auto ai = builder.create( - loc, castPtrTy, castArg, + auto ai = cudaq::cc::ComputePtrOp::create( + builder, loc, castPtrTy, castArg, ArrayRef{i}); - auto tmpVal = builder.create(loc, tmp); + auto tmpVal = cudaq::cc::LoadOp::create(builder, loc, tmp); Value innerSize = descendThroughDynamicType( loc, builder, module, eleTy, tmpVal, ai, tmp); - builder.create(loc, innerSize, tmp); + cudaq::cc::StoreOp::create(builder, loc, innerSize, tmp); }); - return builder.create(loc, tmp); + return cudaq::cc::LoadOp::create(builder, loc, tmp); }) // A struct can be dynamic if it contains dynamic members. Get the // static portion of the struct first, which will have length slots. @@ -457,7 +462,7 @@ Value descendThroughDynamicType(Location loc, OpBuilder &builder, if (cudaq::cc::isDynamicType(t)) { Type packedTy = cudaq::opt::factory::genArgumentBufferType(t); Value strSize = - builder.create(loc, i64Ty, packedTy); + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, packedTy); for (auto iter : llvm::enumerate(t.getMembers())) { std::int32_t i = iter.index(); auto m = iter.value(); @@ -466,20 +471,21 @@ Value descendThroughDynamicType(Location loc, OpBuilder &builder, auto hostStrTy = cast(hostPtrTy.getElementType()); auto pm = cudaq::cc::PointerType::get(hostStrTy.getMember(i)); - auto ai = builder.create( - loc, pm, arg, ArrayRef{i}); + auto ai = cudaq::cc::ComputePtrOp::create( + builder, loc, pm, arg, + ArrayRef{i}); strSize = descendThroughDynamicType( loc, builder, module, m, strSize, ai, tmp); } } return strSize; } - return builder.create(loc, i64Ty, t); + return cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, t); }) .Default([&](Type t) -> Value { - return builder.create(loc, i64Ty, t); + return cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, t); }); - return builder.create(loc, tySize, addend); + return arith::AddIOp::create(builder, loc, tySize, addend); } template @@ -488,7 +494,7 @@ Value genSizeOfDynamicMessageBufferImpl( cudaq::cc::StructType structTy, ArrayRef> zippy, Value tmp) { auto i64Ty = builder.getI64Type(); - Value initSize = builder.create(loc, i64Ty, structTy); + Value initSize = cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, structTy); for (auto [_, a, t] : zippy) if (cudaq::cc::isDynamicType(t)) initSize = descendThroughDynamicType(loc, builder, module, t, @@ -516,28 +522,29 @@ template Value populateStringAddendum(Location loc, OpBuilder &builder, Value host, Value sizeSlot, Value addendum, ModuleOp module) { Value size = genStringLength(loc, builder, host, module); - builder.create(loc, size, sizeSlot); + cudaq::cc::StoreOp::create(builder, loc, size, sizeSlot); auto ptrI8Ty = cudaq::cc::PointerType::get(builder.getI8Type()); Value dataPtr; if constexpr (FromQPU) { - dataPtr = builder.create(loc, ptrI8Ty, host); + dataPtr = cudaq::cc::StdvecDataOp::create(builder, loc, ptrI8Ty, host); } else /*constexpr*/ { - auto fromPtr = builder.create(loc, ptrI8Ty, host); + auto fromPtr = cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, host); StringRef helperName = module->getAttr(cudaq::runtime::sizeofStringAttrName) ? cudaq::runtime::getPauliWordData : cudaq::runtime::bindingGetStringData; - auto call = builder.create(loc, ptrI8Ty, helperName, - ValueRange{fromPtr}); + auto call = func::CallOp::create(builder, loc, ptrI8Ty, helperName, + ValueRange{fromPtr}); dataPtr = call.getResult(0); } - auto notVolatile = builder.create(loc, 0, 1); - auto toPtr = builder.create(loc, ptrI8Ty, addendum); - builder.create(loc, std::nullopt, cudaq::llvmMemCopyIntrinsic, - ValueRange{toPtr, dataPtr, size, notVolatile}); + auto notVolatile = arith::ConstantIntOp::create(builder, loc, 0, 1); + auto toPtr = cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, addendum); + func::CallOp::create(builder, loc, TypeRange{}, cudaq::llvmMemCopyIntrinsic, + ValueRange{toPtr, dataPtr, size, notVolatile}); auto ptrI8Arr = getByteAddressableType(builder); - auto addBytes = builder.create(loc, ptrI8Arr, addendum); - return builder.create( - loc, ptrI8Ty, addBytes, ArrayRef{size}); + auto addBytes = cudaq::cc::CastOp::create(builder, loc, ptrI8Arr, addendum); + return cudaq::cc::ComputePtrOp::create( + builder, loc, ptrI8Ty, addBytes, + ArrayRef{size}); } // Simple case when the vector data is known to not hold dynamic data. @@ -545,7 +552,7 @@ template Value populateVectorAddendum(Location loc, OpBuilder &builder, Value host, Value sizeSlot, Value addendum) { Value size = genVectorSize(loc, builder, host); - builder.create(loc, size, sizeSlot); + cudaq::cc::StoreOp::create(builder, loc, size, sizeSlot); auto ptrI8Ty = cudaq::cc::PointerType::get(builder.getI8Type()); auto ptrPtrI8 = cudaq::opt::marshal::getPointerToPointerType(builder); Value dataPtr = [&]() -> Value { @@ -553,21 +560,22 @@ Value populateVectorAddendum(Location loc, OpBuilder &builder, Value host, auto eleTy = cast(host.getType()).getElementType(); auto ptrTy = cudaq::cc::PointerType::get(eleTy); auto vecDataPtr = - builder.create(loc, ptrTy, host); - return builder.create(loc, ptrI8Ty, vecDataPtr); + cudaq::cc::StdvecDataOp::create(builder, loc, ptrTy, host); + return cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, vecDataPtr); } else /*constexpr*/ { - auto fromPtrPtr = builder.create(loc, ptrPtrI8, host); - return builder.create(loc, fromPtrPtr); + auto fromPtrPtr = cudaq::cc::CastOp::create(builder, loc, ptrPtrI8, host); + return cudaq::cc::LoadOp::create(builder, loc, fromPtrPtr); } }(); - auto notVolatile = builder.create(loc, 0, 1); - auto toPtr = builder.create(loc, ptrI8Ty, addendum); - builder.create(loc, std::nullopt, cudaq::llvmMemCopyIntrinsic, - ValueRange{toPtr, dataPtr, size, notVolatile}); + auto notVolatile = arith::ConstantIntOp::create(builder, loc, 0, 1); + auto toPtr = cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, addendum); + func::CallOp::create(builder, loc, TypeRange{}, cudaq::llvmMemCopyIntrinsic, + ValueRange{toPtr, dataPtr, size, notVolatile}); auto ptrI8Arr = getByteAddressableType(builder); - auto addBytes = builder.create(loc, ptrI8Arr, addendum); - return builder.create( - loc, ptrI8Ty, addBytes, ArrayRef{size}); + auto addBytes = cudaq::cc::CastOp::create(builder, loc, ptrI8Arr, addendum); + return cudaq::cc::ComputePtrOp::create( + builder, loc, ptrI8Ty, addBytes, + ArrayRef{size}); } template @@ -585,16 +593,16 @@ Value populateDynamicAddendum(Location loc, OpBuilder &builder, ModuleOp module, auto [bytes, count] = genByteSizeAndElementCount( loc, builder, module, eleTy, size, host, devArgTy); size = bytes; - builder.create(loc, size, sizeSlot); + cudaq::cc::StoreOp::create(builder, loc, size, sizeSlot); // Convert from bytes to vector length in elements. // Compute new addendum start. auto addrTy = getByteAddressableType(builder); - auto castEnd = builder.create(loc, addrTy, addendum); - Value newAddendum = builder.create( - loc, addendum.getType(), castEnd, + auto castEnd = cudaq::cc::CastOp::create(builder, loc, addrTy, addendum); + Value newAddendum = cudaq::cc::ComputePtrOp::create( + builder, loc, addendum.getType(), castEnd, ArrayRef{size}); - builder.create(loc, newAddendum, addendumScratch); + cudaq::cc::StoreOp::create(builder, loc, newAddendum, addendumScratch); Type dataTy = cudaq::opt::factory::genArgumentBufferType(eleTy); auto arrDataTy = cudaq::cc::ArrayType::get(dataTy); auto sizeBlockTy = cudaq::cc::PointerType::get(arrDataTy); @@ -605,7 +613,7 @@ Value populateDynamicAddendum(Location loc, OpBuilder &builder, ModuleOp module, // and expressed in bytes. Each size will be the size of the span of the // element (or its subfields) at that offset. auto sizeBlock = - builder.create(loc, sizeBlockTy, addendum); + cudaq::cc::CastOp::create(builder, loc, sizeBlockTy, addendum); auto hostEleTy = cast(host.getType()).getElementType(); auto ptrPtrBlockTy = cudaq::cc::PointerType::get( @@ -615,14 +623,15 @@ Value populateDynamicAddendum(Location loc, OpBuilder &builder, ModuleOp module, // "front" out of the vector (the first pointer in the triple) and step // over the contiguous range of vectors in the host block. The vector of // vectors forms a ragged array structure in host memory. - auto hostBeginPtrRef = builder.create( - loc, ptrPtrBlockTy, host, ArrayRef{0}); - auto hostBegin = builder.create(loc, hostBeginPtrRef); + auto hostBeginPtrRef = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrPtrBlockTy, host, + ArrayRef{0}); + auto hostBegin = cudaq::cc::LoadOp::create(builder, loc, hostBeginPtrRef); auto hostBeginEleTy = cast(hostBegin.getType()); auto hostBlockTy = cudaq::cc::PointerType::get( cudaq::cc::ArrayType::get(hostBeginEleTy.getElementType())); auto hostBlock = - builder.create(loc, hostBlockTy, hostBegin); + cudaq::cc::CastOp::create(builder, loc, hostBlockTy, hostBegin); // Loop over each vector element in the vector (recursively). cudaq::opt::factory::createInvariantLoop( @@ -630,19 +639,19 @@ Value populateDynamicAddendum(Location loc, OpBuilder &builder, ModuleOp module, [&](OpBuilder &builder, Location loc, Region &, Block &block) { Value i = block.getArgument(0); Value addm = - builder.create(loc, addendumScratch); - auto subSlot = builder.create( - loc, ptrDataTy, sizeBlock, + cudaq::cc::LoadOp::create(builder, loc, addendumScratch); + auto subSlot = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrDataTy, sizeBlock, ArrayRef{i}); - auto subHost = builder.create( - loc, hostBeginEleTy, hostBlock, + auto subHost = cudaq::cc::ComputePtrOp::create( + builder, loc, hostBeginEleTy, hostBlock, ArrayRef{i}); Value newAddm = populateDynamicAddendum( loc, builder, module, eleTy, subHost, subSlot, addm, addendumScratch); - builder.create(loc, newAddm, addendumScratch); + cudaq::cc::StoreOp::create(builder, loc, newAddm, addendumScratch); }); - return builder.create(loc, addendumScratch); + return cudaq::cc::LoadOp::create(builder, loc, addendumScratch); } return populateVectorAddendum(loc, builder, host, sizeSlot, addendum); @@ -656,23 +665,23 @@ Value populateDynamicAddendum(Location loc, OpBuilder &builder, ModuleOp module, auto hostPtrTy = cast(host.getType()); auto hostMemTy = cast(hostPtrTy.getElementType()) .getMember(iterIdx); - auto val = builder.create( - loc, cudaq::cc::PointerType::get(hostMemTy), host, + auto val = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(hostMemTy), host, ArrayRef{iterIdx}); Type iterTy = iter.value(); if (cudaq::cc::isDynamicType(iterTy)) { - Value fieldInSlot = builder.create( - loc, cudaq::cc::PointerType::get(builder.getI64Type()), sizeSlot, - ArrayRef{iterIdx}); + Value fieldInSlot = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(builder.getI64Type()), + sizeSlot, ArrayRef{iterIdx}); addendum = populateDynamicAddendum(loc, builder, module, iterTy, val, fieldInSlot, addendum, addendumScratch); } else { - Value fieldInSlot = builder.create( - loc, cudaq::cc::PointerType::get(iterTy), sizeSlot, + Value fieldInSlot = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(iterTy), sizeSlot, ArrayRef{iterIdx}); - auto v = builder.create(loc, val); - builder.create(loc, v, fieldInSlot); + auto v = cudaq::cc::LoadOp::create(builder, loc, val); + cudaq::cc::StoreOp::create(builder, loc, v, fieldInSlot); } } return addendum; @@ -693,8 +702,9 @@ void populateMessageBufferImpl( // Get the address of the slot to be filled. auto memberTy = cast(structTy).getMember(i); auto ptrTy = cudaq::cc::PointerType::get(memberTy); - auto slot = builder.create( - loc, ptrTy, msgBufferBase, ArrayRef{i}); + auto slot = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrTy, msgBufferBase, + ArrayRef{i}); addendum = populateDynamicAddendum( loc, builder, module, devArgTy, arg, slot, addendum, addendumScratch); continue; @@ -711,8 +721,9 @@ void populateMessageBufferImpl( // Get the address of the slot to be filled. auto memberTy = cast(structTy).getMember(i); auto ptrTy = cudaq::cc::PointerType::get(memberTy); - Value slot = builder.create( - loc, ptrTy, msgBufferBase, ArrayRef{i}); + Value slot = + cudaq::cc::ComputePtrOp::create(builder, loc, ptrTy, msgBufferBase, + ArrayRef{i}); // Argument is a packaged kernel. In this case, the argument is some // unknown kernel that may be called. The packaged argument is coming @@ -721,9 +732,10 @@ void populateMessageBufferImpl( // launch kernel. if (isa(devArgTy)) { auto i64Ty = builder.getI64Type(); - auto kernKey = builder.create( - loc, i64Ty, cudaq::runtime::getLinkableKernelKey, ValueRange{arg}); - builder.create(loc, kernKey.getResult(0), slot); + auto kernKey = func::CallOp::create(builder, loc, i64Ty, + cudaq::runtime::getLinkableKernelKey, + ValueRange{arg}); + cudaq::cc::StoreOp::create(builder, loc, kernKey.getResult(0), slot); continue; } @@ -732,14 +744,14 @@ void populateMessageBufferImpl( // is a simulation and things are in the same address space, we pass the // pointer for convenience. if (isa(devArgTy)) - arg = builder.create(loc, memberTy, arg); + arg = cudaq::cc::CastOp::create(builder, loc, memberTy, arg); if (isa(arg.getType()) && (cudaq::cc::PointerType::get(arg.getType()) != slot.getType())) { - slot = builder.create( - loc, cudaq::cc::PointerType::get(arg.getType()), slot); + slot = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(arg.getType()), slot); } - builder.create(loc, arg, slot); + cudaq::cc::StoreOp::create(builder, loc, arg, slot); } } @@ -805,10 +817,10 @@ void cudaq::opt::marshal::genStdvecBoolFromInitList(Location loc, Value sret, Value data, Value size) { auto ptrTy = cc::PointerType::get(builder.getContext()); - auto castData = builder.create(loc, ptrTy, data); - auto castSret = builder.create(loc, ptrTy, sret); - builder.create(loc, std::nullopt, stdvecBoolCtorFromInitList, - ArrayRef{castSret, castData, size}); + auto castData = cc::CastOp::create(builder, loc, ptrTy, data); + auto castSret = cc::CastOp::create(builder, loc, ptrTy, sret); + func::CallOp::create(builder, loc, TypeRange{}, stdvecBoolCtorFromInitList, + ArrayRef{castSret, castData, size}); } void cudaq::opt::marshal::genStdvecTFromInitList(Location loc, @@ -818,59 +830,58 @@ void cudaq::opt::marshal::genStdvecTFromInitList(Location loc, auto i8Ty = builder.getI8Type(); auto stlVectorTy = cc::PointerType::get(opt::factory::stlVectorType(i8Ty)); auto ptrTy = cc::PointerType::get(i8Ty); - auto castSret = builder.create(loc, stlVectorTy, sret); + auto castSret = cc::CastOp::create(builder, loc, stlVectorTy, sret); auto ptrPtrTy = cc::PointerType::get(ptrTy); - auto sret0 = builder.create( - loc, ptrPtrTy, castSret, SmallVector{0}); + auto sret0 = cc::ComputePtrOp::create(builder, loc, ptrPtrTy, castSret, + SmallVector{0}); auto arrI8Ty = cc::ArrayType::get(i8Ty); auto ptrArrTy = cc::PointerType::get(arrI8Ty); - auto buffPtr0 = builder.create(loc, ptrTy, data); - builder.create(loc, buffPtr0, sret0); - auto sret1 = builder.create( - loc, ptrPtrTy, castSret, SmallVector{1}); - Value byteLen = builder.create(loc, tSize, vecSize); - auto buffPtr = builder.create(loc, ptrArrTy, data); - auto endPtr = builder.create( - loc, ptrTy, buffPtr, SmallVector{byteLen}); - builder.create(loc, endPtr, sret1); - auto sret2 = builder.create( - loc, ptrPtrTy, castSret, SmallVector{2}); - builder.create(loc, endPtr, sret2); + auto buffPtr0 = cc::CastOp::create(builder, loc, ptrTy, data); + cc::StoreOp::create(builder, loc, buffPtr0, sret0); + auto sret1 = cc::ComputePtrOp::create(builder, loc, ptrPtrTy, castSret, + SmallVector{1}); + Value byteLen = arith::MulIOp::create(builder, loc, tSize, vecSize); + auto buffPtr = cc::CastOp::create(builder, loc, ptrArrTy, data); + auto endPtr = cc::ComputePtrOp::create( + builder, loc, ptrTy, buffPtr, SmallVector{byteLen}); + cc::StoreOp::create(builder, loc, endPtr, sret1); + auto sret2 = cc::ComputePtrOp::create(builder, loc, ptrPtrTy, castSret, + SmallVector{2}); + cc::StoreOp::create(builder, loc, endPtr, sret2); } Value cudaq::opt::marshal::createEmptyHeapTracker(Location loc, OpBuilder &builder) { auto ptrI8Ty = cc::PointerType::get(builder.getI8Type()); - auto result = builder.create(loc, ptrI8Ty); - auto zero = builder.create(loc, 0, 64); - auto null = builder.create(loc, ptrI8Ty, zero); - builder.create(loc, null, result); + auto result = cc::AllocaOp::create(builder, loc, ptrI8Ty); + auto zero = arith::ConstantIntOp::create(builder, loc, 0, 64); + auto null = cc::CastOp::create(builder, loc, ptrI8Ty, zero); + cc::StoreOp::create(builder, loc, null, result); return result; } void cudaq::opt::marshal::maybeFreeHeapAllocations(Location loc, OpBuilder &builder, Value heapTracker) { - auto head = builder.create(loc, heapTracker); - auto zero = builder.create(loc, 0, 64); - auto headAsInt = builder.create(loc, builder.getI64Type(), head); - auto cmp = builder.create(loc, arith::CmpIPredicate::ne, - headAsInt, zero); + auto head = cc::LoadOp::create(builder, loc, heapTracker); + auto zero = arith::ConstantIntOp::create(builder, loc, 0, 64); + auto headAsInt = cc::CastOp::create(builder, loc, builder.getI64Type(), head); + auto cmp = arith::CmpIOp::create(builder, loc, arith::CmpIPredicate::ne, + headAsInt, zero); // If there are no std::vector to unpack, then the heapTracker will be // set to `nullptr` and otherwise unused. That will allow the compiler to DCE // this call after constant propagation. - builder.create( - loc, TypeRange{}, cmp, - [&](OpBuilder &builder, Location loc, Region ®ion) { - region.push_back(new Block()); - auto &body = region.front(); - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToStart(&body); - builder.create(loc, std::nullopt, - stdvecBoolFreeTemporaryLists, - ArrayRef{head}); - builder.create(loc); - }); + cc::IfOp::create(builder, loc, TypeRange{}, cmp, + [&](OpBuilder &builder, Location loc, Region ®ion) { + region.push_back(new Block()); + auto &body = region.front(); + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(&body); + func::CallOp::create(builder, loc, TypeRange{}, + stdvecBoolFreeTemporaryLists, + ArrayRef{head}); + cc::ContinueOp::create(builder, loc); + }); } /// Fetch an argument from the comm buffer. Here, the argument is not dynamic so @@ -881,33 +892,33 @@ Value fetchInputValue(Location loc, OpBuilder &builder, Type devTy, Value ptr) { if (isa(devTy)) { // An indirect callable passes a key value which will be used to determine // the kernel that is being called. - auto key = builder.create(loc, ptr); - return builder.create(loc, devTy, key); + auto key = cudaq::cc::LoadOp::create(builder, loc, ptr); + return cudaq::cc::CastOp::create(builder, loc, devTy, key); } if (isa(devTy)) { // A direct callable will have already been effectively inlined and this // argument should not be referenced. - return builder.create(loc, devTy); + return cudaq::cc::PoisonOp::create(builder, loc, devTy); } auto ptrDevTy = cudaq::cc::PointerType::get(devTy); if (auto strTy = dyn_cast(devTy)) { // Argument is a struct. if (strTy.isEmpty()) - return builder.create(loc, devTy); + return cudaq::cc::UndefOp::create(builder, loc, devTy); // Cast to avoid conflicts between layout compatible, distinct struct types. - auto structPtr = builder.create(loc, ptrDevTy, ptr); + auto structPtr = cudaq::cc::CastOp::create(builder, loc, ptrDevTy, ptr); if constexpr (FromQPU) { return structPtr; } else { - return builder.create(loc, structPtr); + return cudaq::cc::LoadOp::create(builder, loc, structPtr); } } // Default case: argument passed as a value inplace. - return builder.create(loc, ptr); + return cudaq::cc::LoadOp::create(builder, loc, ptr); } /// Helper routine to generate code to increment the trailing data pointer to @@ -916,10 +927,12 @@ static Value incrementTrailingDataPointer(Location loc, OpBuilder &builder, Value trailingData, Value bytes) { auto i8Ty = builder.getI8Type(); auto bufferTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(i8Ty)); - auto buffPtr = builder.create(loc, bufferTy, trailingData); + auto buffPtr = + cudaq::cc::CastOp::create(builder, loc, bufferTy, trailingData); auto i8PtrTy = cudaq::cc::PointerType::get(i8Ty); - return builder.create( - loc, i8PtrTy, buffPtr, ArrayRef{bytes}); + return cudaq::cc::ComputePtrOp::create( + builder, loc, i8PtrTy, buffPtr, + ArrayRef{bytes}); } /// In the thunk, we need to unpack any `std::vector` objects encoded in the @@ -957,11 +970,11 @@ constructDynamicInputValue(Location loc, OpBuilder &builder, Type devTy, if (auto charSpanTy = dyn_cast(devTy)) { // From host, so construct the stdvec span with it. auto eleTy = charSpanTy.getElementType(); - auto castTrailingData = builder.create( - loc, cudaq::cc::PointerType::get(eleTy), trailingData); - Value vecLength = builder.create(loc, ptr); - auto result = builder.create( - loc, charSpanTy, castTrailingData, vecLength); + auto castTrailingData = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(eleTy), trailingData); + Value vecLength = cudaq::cc::LoadOp::create(builder, loc, ptr); + auto result = cudaq::cc::StdvecInitOp::create( + builder, loc, charSpanTy, castTrailingData, vecLength); auto nextTrailingData = incrementTrailingDataPointer(loc, builder, trailingData, vecLength); return {result, nextTrailingData}; @@ -979,9 +992,9 @@ constructDynamicInputValue(Location loc, OpBuilder &builder, Type devTy, // Get the size of each element in the vector and compute the vector's // logical length. - auto eleSize = builder.create(loc, i64Ty, buffEleTy); - Value bytes = builder.create(loc, ptr); - auto vecLength = builder.create(loc, bytes, eleSize); + auto eleSize = cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, buffEleTy); + Value bytes = cudaq::cc::LoadOp::create(builder, loc, ptr); + auto vecLength = arith::DivSIOp::create(builder, loc, bytes, eleSize); if (cudaq::cc::isDynamicType(eleTy)) { // The vector is recursively dynamic. @@ -998,7 +1011,7 @@ constructDynamicInputValue(Location loc, OpBuilder &builder, Type devTy, } }(); Value newVecData = - builder.create(loc, toTy, vecLength); + cudaq::cc::AllocaOp::create(builder, loc, toTy, vecLength); // Compute new trailing data, skipping the current vector's data. auto nextTrailingData = incrementTrailingDataPointer(loc, builder, trailingData, bytes); @@ -1011,34 +1024,34 @@ constructDynamicInputValue(Location loc, OpBuilder &builder, Type devTy, cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(packTy)); Type packedEleTy = cudaq::cc::PointerType::get(packTy); auto arrPtr = - builder.create(loc, packedArrTy, trailingData); + cudaq::cc::CastOp::create(builder, loc, packedArrTy, trailingData); auto trailingDataVar = - builder.create(loc, nextTrailingData.getType()); - builder.create(loc, nextTrailingData, - trailingDataVar); + cudaq::cc::AllocaOp::create(builder, loc, nextTrailingData.getType()); + cudaq::cc::StoreOp::create(builder, loc, nextTrailingData, + trailingDataVar); cudaq::opt::factory::createInvariantLoop( builder, loc, vecLength, [&](OpBuilder &builder, Location loc, Region &, Block &block) { Value i = block.getArgument(0); auto nextTrailingData = - builder.create(loc, trailingDataVar); - auto vecMemPtr = builder.create( - loc, packedEleTy, arrPtr, + cudaq::cc::LoadOp::create(builder, loc, trailingDataVar); + auto vecMemPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, packedEleTy, arrPtr, ArrayRef{i}); auto r = constructDynamicInputValue( loc, builder, eleTy, vecMemPtr, nextTrailingData); - auto newVecPtr = builder.create( - loc, elePtrTy, newVecData, + auto newVecPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, elePtrTy, newVecData, ArrayRef{i}); - builder.create(loc, r.first, newVecPtr); - builder.create(loc, r.second, trailingDataVar); + cudaq::cc::StoreOp::create(builder, loc, r.first, newVecPtr); + cudaq::cc::StoreOp::create(builder, loc, r.second, trailingDataVar); }); // Create the new outer stdvec span as the result. - Value stdvecResult = builder.create( - loc, spanTy, newVecData, vecLength); + Value stdvecResult = cudaq::cc::StdvecInitOp::create( + builder, loc, spanTy, newVecData, vecLength); nextTrailingData = - builder.create(loc, trailingDataVar); + cudaq::cc::LoadOp::create(builder, loc, trailingDataVar); return {stdvecResult, nextTrailingData}; } @@ -1050,28 +1063,28 @@ constructDynamicInputValue(Location loc, OpBuilder &builder, Type devTy, auto *ctx = builder.getContext(); auto vecTy = cudaq::cc::StructType::get(ctx, ArrayRef{ptrTy, ptrTy, ptrTy}); - Value vecVar = builder.create(loc, vecTy); + Value vecVar = cudaq::cc::UndefOp::create(builder, loc, vecTy); Value castData = - builder.create(loc, ptrTy, trailingData); - vecVar = builder.create(loc, vecTy, vecVar, - castData, 0); + cudaq::cc::CastOp::create(builder, loc, ptrTy, trailingData); + vecVar = cudaq::cc::InsertValueOp::create(builder, loc, vecTy, vecVar, + castData, 0); auto ptrArrTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); auto castTrailingData = - builder.create(loc, ptrArrTy, trailingData); - Value castEnd = builder.create( - loc, ptrTy, castTrailingData, + cudaq::cc::CastOp::create(builder, loc, ptrArrTy, trailingData); + Value castEnd = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrTy, castTrailingData, ArrayRef{bytes}); - vecVar = builder.create(loc, vecTy, vecVar, - castEnd, 1); - result = builder.create(loc, vecTy, vecVar, - castEnd, 2); + vecVar = cudaq::cc::InsertValueOp::create(builder, loc, vecTy, vecVar, + castEnd, 1); + result = cudaq::cc::InsertValueOp::create(builder, loc, vecTy, vecVar, + castEnd, 2); } else /*constexpr*/ { // From host, so construct the stdvec span with it. - auto castTrailingData = builder.create( - loc, cudaq::cc::PointerType::get(eleTy), trailingData); - result = builder.create( - loc, spanTy, castTrailingData, vecLength); + auto castTrailingData = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(eleTy), trailingData); + result = cudaq::cc::StdvecInitOp::create(builder, loc, spanTy, + castTrailingData, vecLength); } auto nextTrailingData = incrementTrailingDataPointer(loc, builder, trailingData, bytes); @@ -1086,27 +1099,27 @@ constructDynamicInputValue(Location loc, OpBuilder &builder, Type devTy, auto strTy = cast(devTy); auto ptrEleTy = cast(ptr.getType()).getElementType(); auto packedTy = cast(ptrEleTy); - Value result = builder.create(loc, strTy); + Value result = cudaq::cc::UndefOp::create(builder, loc, strTy); assert(strTy.getNumMembers() == packedTy.getNumMembers()); for (auto iter : llvm::enumerate(llvm::zip(strTy.getMembers(), packedTy.getMembers()))) { auto devMemTy = std::get<0>(iter.value()); std::int32_t off = iter.index(); auto packedMemTy = std::get<1>(iter.value()); - auto dataPtr = builder.create( - loc, cudaq::cc::PointerType::get(packedMemTy), ptr, + auto dataPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(packedMemTy), ptr, ArrayRef{off}); if (cudaq::cc::isDynamicType(devMemTy)) { auto r = constructDynamicInputValue(loc, builder, devMemTy, dataPtr, trailingData); - result = builder.create(loc, strTy, result, - r.first, off); + result = cudaq::cc::InsertValueOp::create(builder, loc, strTy, result, + r.first, off); trailingData = r.second; continue; } auto val = fetchInputValue(loc, builder, devMemTy, dataPtr); result = - builder.create(loc, strTy, result, val, off); + cudaq::cc::InsertValueOp::create(builder, loc, strTy, result, val, off); } return {result, trailingData}; } @@ -1116,8 +1129,8 @@ std::pair processInputValueImpl(Location loc, OpBuilder &builder, Value trailingData, Value ptrPackedStruct, Type inTy, std::int32_t off, cudaq::cc::StructType packedStructTy) { - auto packedPtr = builder.create( - loc, cudaq::cc::PointerType::get(packedStructTy.getMember(off)), + auto packedPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(packedStructTy.getMember(off)), ptrPackedStruct, ArrayRef{off}); if (cudaq::cc::isDynamicType(inTy)) { if constexpr (FromQPU) { @@ -1125,24 +1138,24 @@ processInputValueImpl(Location loc, OpBuilder &builder, Value trailingData, loc, builder, inTy, packedPtr, trailingData); if (isa(inTy)) { Value retVal = dynamo.first; - Value tmp = builder.create(loc, retVal.getType()); - builder.create(loc, retVal, tmp); + Value tmp = cudaq::cc::AllocaOp::create(builder, loc, retVal.getType()); + cudaq::cc::StoreOp::create(builder, loc, retVal, tmp); return {tmp, dynamo.second}; } if (isa(inTy)) { auto module = packedPtr->getParentOfType(); auto arrTy = cudaq::opt::factory::genHostStringType(module); Value retVal = dynamo.first; - Value tmp = builder.create(loc, arrTy); + Value tmp = cudaq::cc::AllocaOp::create(builder, loc, arrTy); auto ptrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - Value castTmp = builder.create(loc, ptrTy, tmp); - Value len = builder.create( - loc, builder.getI64Type(), dynamo.first); + Value castTmp = cudaq::cc::CastOp::create(builder, loc, ptrTy, tmp); + Value len = cudaq::cc::StdvecSizeOp::create( + builder, loc, builder.getI64Type(), dynamo.first); Value data = - builder.create(loc, ptrTy, dynamo.first); - builder.create(loc, TypeRange{}, - cudaq::runtime::bindingInitializeString, - ArrayRef{castTmp, data, len}); + cudaq::cc::StdvecDataOp::create(builder, loc, ptrTy, dynamo.first); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::bindingInitializeString, + ArrayRef{castTmp, data, len}); return {tmp, dynamo.second}; } return dynamo; diff --git a/lib/Optimizer/CAPI/CMakeLists.txt b/lib/Optimizer/CAPI/CMakeLists.txt index b17dfaeae15..61496606da1 100644 --- a/lib/Optimizer/CAPI/CMakeLists.txt +++ b/lib/Optimizer/CAPI/CMakeLists.txt @@ -6,13 +6,14 @@ # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # -add_mlir_public_c_api_library(CUDAQuantumMLIRCAPI +add_mlir_public_c_api_library(CUDAQuantumMLIRCAPI Dialects.cpp DEPENDS QuakeDialectIncGen - LINK_LIBS PRIVATE - QuakeDialect + LINK_LIBS PRIVATE + QuakeDialect CCDialect + MLIRRegisterAllDialects ) diff --git a/lib/Optimizer/CAPI/Dialects.cpp b/lib/Optimizer/CAPI/Dialects.cpp index 59a1210a694..8d2c482d465 100644 --- a/lib/Optimizer/CAPI/Dialects.cpp +++ b/lib/Optimizer/CAPI/Dialects.cpp @@ -9,6 +9,16 @@ #include "cudaq/Optimizer/CAPI/Dialects.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" +#include "mlir/InitAllDialects.h" MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(Quake, quake, quake::QuakeDialect) MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(CC, cc, cudaq::cc::CCDialect) + +extern "C" void cudaqRegisterAllDialects(MlirContext context) { + mlir::DialectRegistry registry; + registry.insert(); + mlir::registerAllDialects(registry); + auto *mlirContext = unwrap(context); + mlirContext->appendDialectRegistry(registry); + mlirContext->loadAllAvailableDialects(); +} diff --git a/lib/Optimizer/CodeGen/CCToLLVM.cpp b/lib/Optimizer/CodeGen/CCToLLVM.cpp index ce0e4b50bb5..8291c1ff63d 100644 --- a/lib/Optimizer/CodeGen/CCToLLVM.cpp +++ b/lib/Optimizer/CodeGen/CCToLLVM.cpp @@ -52,21 +52,30 @@ class AllocaOpPattern : public ConvertOpToLLVMPattern { LogicalResult matchAndRewrite(cudaq::cc::AllocaOp alloc, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto operands = adaptor.getOperands(); - auto toTy = LLVM::LLVMPointerType::get([&]() -> Type { - if (auto arrTy = dyn_cast(alloc.getElementType()); - arrTy && arrTy.isUnknownSize()) - return getTypeConverter()->convertType(arrTy.getElementType()); - return getTypeConverter()->convertType(alloc.getElementType()); - }()); - if (operands.empty()) { - rewriter.replaceOpWithNewOp( - alloc, toTy, - ArrayRef{cudaq::opt::factory::genLlvmI32Constant( - alloc.getLoc(), rewriter, 1)}); - } else { - rewriter.replaceOpWithNewOp(alloc, toTy, operands); + Type type = getTypeConverter()->convertType(alloc.getElementType()); + Value size = adaptor.getSeqSize(); + if (!size) + size = + cudaq::opt::factory::genLlvmI32Constant(alloc.getLoc(), rewriter, 1); +#ifdef __APPLE__ + if (alloc.getElementType().isInteger(8) && adaptor.getSeqSize()) { + auto loc = alloc.getLoc(); + auto i64Ty = rewriter.getI64Type(); + Value sized = size; + if (sized.getType() != i64Ty) + sized = LLVM::ZExtOp::create(rewriter, loc, i64Ty, sized); + auto seven = LLVM::ConstantOp::create(rewriter, loc, i64Ty, + rewriter.getI64IntegerAttr(7)); + auto mask = LLVM::ConstantOp::create( + rewriter, loc, i64Ty, + rewriter.getI64IntegerAttr(static_cast(~7ULL))); + auto bumped = LLVM::AddOp::create(rewriter, loc, i64Ty, sized, seven); + size = LLVM::AndOp::create(rewriter, loc, i64Ty, bumped, mask); } +#endif + rewriter.replaceOpWithNewOp( + alloc, cudaq::opt::factory::getPointerType(rewriter.getContext()), type, + size); return success(); } }; @@ -86,30 +95,26 @@ class CallableClosureOpPattern resTy.push_back(getTypeConverter()->convertType(callable.getType(i))); auto *ctx = rewriter.getContext(); auto tupleTy = LLVM::LLVMStructType::getLiteral(ctx, resTy); - auto tuplePtrTy = cudaq::opt::factory::getPointerType(tupleTy); + auto tuplePtrTy = cudaq::opt::factory::getPointerType(ctx); auto structTy = dyn_cast(operands[0].getType()); if (!structTy) return failure(); auto one = DenseI64ArrayAttr::get(ctx, ArrayRef{1}); - auto extract = rewriter.create( - loc, structTy.getBody()[1], operands[0], one); - if (resTy.size() == 1 && resTy[0] != tupleTy) { - auto tupleVal = rewriter.create( - loc, cudaq::opt::factory::getPointerType(resTy[0]), extract); - rewriter.replaceOpWithNewOp(callable, tupleVal); - } else { - auto tuplePtr = - rewriter.create(loc, tuplePtrTy, extract); - auto tupleVal = rewriter.create(loc, tupleTy, tuplePtr); - SmallVector exposedVals; - for (std::int64_t i = 0, N = resTy.size(); i < N; ++i) { - auto offset = DenseI64ArrayAttr::get(ctx, ArrayRef{i}); - auto extract = rewriter.create( - loc, tupleTy.getBody()[i], tupleVal, offset); - exposedVals.push_back(extract); - } - rewriter.replaceOp(callable, exposedVals); + auto extract = LLVM::ExtractValueOp::create( + rewriter, loc, structTy.getBody()[1], operands[0], one); + auto tupleVal = LLVM::BitcastOp::create(rewriter, loc, tuplePtrTy, extract); + auto loadOp = LLVM::LoadOp::create(rewriter, loc, tupleTy, tupleVal); + // In LLVM 22, replaceOp strictly requires the same number of results. + // The LoadOp returns a single struct value; extract each field to match + // the multiple results of CallableClosureOp. + SmallVector results; + for (std::size_t i = 0, N = callable.getResults().size(); i < N; ++i) { + auto idx = DenseI64ArrayAttr::get( + ctx, ArrayRef{static_cast(i)}); + results.push_back(LLVM::ExtractValueOp::create(rewriter, loc, resTy[i], + loadOp.getResult(), idx)); } + rewriter.replaceOp(callable, results); return success(); } }; @@ -130,8 +135,8 @@ class CallableFuncOpPattern return failure(); auto *ctx = rewriter.getContext(); auto zero = DenseI64ArrayAttr::get(ctx, ArrayRef{0}); - auto extract = rewriter.create( - loc, structTy.getBody()[0], operands[0], zero); + auto extract = LLVM::ExtractValueOp::create( + rewriter, loc, structTy.getBody()[0], operands[0], zero); rewriter.replaceOpWithNewOp(callable, resTy, extract); return success(); } @@ -146,6 +151,7 @@ class CallCallableOpPattern matchAndRewrite(cudaq::cc::CallCallableOp call, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto loc = call.getLoc(); + // Get the mlir::FunctionType signature from the callable auto calleeFuncTy = cast(call.getCallee().getType()) .getSignature(); @@ -154,52 +160,82 @@ class CallCallableOpPattern auto structTy = dyn_cast(operands[0].getType()); if (!structTy) return failure(); + + // Extract raw function pointer (first element of callable struct) auto ptr0Ty = structTy.getBody()[0]; auto zero = DenseI64ArrayAttr::get(ctx, ArrayRef{0}); auto rawFuncPtr = - rewriter.create(loc, ptr0Ty, operands[0], zero); + LLVM::ExtractValueOp::create(rewriter, loc, ptr0Ty, operands[0], zero); + + // Extract raw tuple pointer (second element of callable struct) auto ptr1Ty = structTy.getBody()[1]; auto one = DenseI64ArrayAttr::get(ctx, ArrayRef{1}); auto rawTuplePtr = - rewriter.create(loc, ptr1Ty, operands[0], one); - Type funcPtrTy = getTypeConverter()->convertType(calleeFuncTy); - auto funcPtr = rewriter.create(loc, funcPtrTy, rawFuncPtr); + LLVM::ExtractValueOp::create(rewriter, loc, ptr1Ty, operands[0], one); + + // Build the LLVM function type by converting the signature's types + // individually (since convertType on FunctionType returns ptr with opaque + // pointers) + SmallVector llvmArgTys; + for (Type argTy : calleeFuncTy.getInputs()) + llvmArgTys.push_back(getTypeConverter()->convertType(argTy)); + + Type llvmRetTy; + if (calleeFuncTy.getNumResults() == 0) + llvmRetTy = LLVM::LLVMVoidType::get(ctx); + else if (calleeFuncTy.getNumResults() == 1) + llvmRetTy = getTypeConverter()->convertType(calleeFuncTy.getResult(0)); + else { + // Multiple results - pack into a struct + SmallVector llvmResultTys; + for (Type resTy : calleeFuncTy.getResults()) + llvmResultTys.push_back(getTypeConverter()->convertType(resTy)); + llvmRetTy = LLVM::LLVMStructType::getLiteral(ctx, llvmResultTys); + } + auto llvmFuncTy = LLVM::LLVMFunctionType::get(llvmRetTy, llvmArgTys); + + // Check if tuple pointer is null (determines direct vs closure call) auto i64Ty = rewriter.getI64Type(); auto zeroI64 = cudaq::opt::factory::genLlvmI64Constant(loc, rewriter, 0); auto rawTupleVal = - rewriter.create(loc, i64Ty, rawTuplePtr); - auto isNullptr = rewriter.create(loc, LLVM::ICmpPredicate::eq, - rawTupleVal, zeroI64); + LLVM::PtrToIntOp::create(rewriter, loc, i64Ty, rawTuplePtr); + auto isNullptr = LLVM::ICmpOp::create( + rewriter, loc, LLVM::ICmpPredicate::eq, rawTupleVal, zeroI64); + + // Create control flow blocks auto *initBlock = rewriter.getInsertionBlock(); auto initPos = rewriter.getInsertionPoint(); auto *endBlock = rewriter.splitBlock(initBlock, initPos); auto *thenBlock = rewriter.createBlock(endBlock); auto *elseBlock = rewriter.createBlock(endBlock); + SmallVector resultTy; - auto llvmFuncTy = cast( - cast(funcPtrTy).getElementType()); if (!isa(llvmFuncTy.getReturnType())) { resultTy.push_back(llvmFuncTy.getReturnType()); endBlock->addArgument(resultTy[0], loc); } + rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, isNullptr, thenBlock, elseBlock); + LLVM::CondBrOp::create(rewriter, loc, isNullptr, thenBlock, elseBlock); + + // Then block: tuple is null, call function directly with remaining operands rewriter.setInsertionPointToEnd(thenBlock); - SmallVector arguments1 = {funcPtr}; - arguments1.append(operands.begin() + 1, operands.end()); - auto call1 = rewriter.create(loc, resultTy, arguments1); - rewriter.create(loc, call1.getResults(), endBlock); + SmallVector calleeOps1 = {rawFuncPtr}; + calleeOps1.append(operands.begin() + 1, operands.end()); + auto call1 = LLVM::CallOp::create(rewriter, loc, llvmFuncTy, calleeOps1); + LLVM::BrOp::create(rewriter, loc, call1.getResults(), endBlock); + + // Else block: tuple is not null, call with callable struct as first arg rewriter.setInsertionPointToEnd(elseBlock); - SmallVector argTys(operands.getTypes().begin(), - operands.getTypes().end()); - auto adjustedFuncTy = - LLVM::LLVMFunctionType::get(llvmFuncTy.getReturnType(), argTys); - auto adjustedFuncPtr = rewriter.create( - loc, cudaq::opt::factory::getPointerType(adjustedFuncTy), funcPtr); - SmallVector arguments2 = {adjustedFuncPtr}; - arguments2.append(operands.begin(), operands.end()); - auto call2 = rewriter.create(loc, resultTy, arguments2); - rewriter.create(loc, call2.getResults(), endBlock); + SmallVector calleeOps2 = {rawFuncPtr}; + calleeOps2.append(operands.begin(), operands.end()); + SmallVector closureArgTys; + closureArgTys.push_back(operands[0].getType()); + closureArgTys.append(llvmArgTys.begin(), llvmArgTys.end()); + auto closureFuncTy = LLVM::LLVMFunctionType::get(llvmRetTy, closureArgTys); + auto call2 = LLVM::CallOp::create(rewriter, loc, closureFuncTy, calleeOps2); + LLVM::BrOp::create(rewriter, loc, call2.getResults(), endBlock); + rewriter.replaceOp(call, endBlock->getArguments()); return success(); } @@ -214,13 +250,29 @@ class CallIndirectCallableOpPattern matchAndRewrite(cudaq::cc::CallIndirectCallableOp call, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto loc = call.getLoc(); + auto *ctx = rewriter.getContext(); auto parentModule = call->getParentOfType(); - auto funcPtrTy = getTypeConverter()->convertType( - cast(call.getCallee().getType()) - .getSignature()); - auto ptrTy = LLVM::LLVMPointerType::get(rewriter.getI8Type()); - auto funcTy = cast( - cast(funcPtrTy).getElementType()); + auto indirectTy = + cast(call.getCallee().getType()); + mlir::FunctionType calleeFuncTy = indirectTy.getSignature(); + auto funcPtrTy = getTypeConverter()->convertType(calleeFuncTy); + auto ptrTy = cudaq::opt::factory::getPointerType(ctx); + SmallVector llvmArgTys; + for (Type argTy : calleeFuncTy.getInputs()) + llvmArgTys.push_back(getTypeConverter()->convertType(argTy)); + Type llvmRetTy; + if (calleeFuncTy.getNumResults() == 0) + llvmRetTy = LLVM::LLVMVoidType::get(ctx); + else if (calleeFuncTy.getNumResults() == 1) + llvmRetTy = getTypeConverter()->convertType(calleeFuncTy.getResult(0)); + else { + SmallVector llvmResultTys; + for (Type resTy : calleeFuncTy.getResults()) + llvmResultTys.push_back(getTypeConverter()->convertType(resTy)); + llvmRetTy = LLVM::LLVMStructType::getLiteral(ctx, llvmResultTys); + } + LLVM::LLVMFunctionType funcTy = + LLVM::LLVMFunctionType::get(llvmRetTy, llvmArgTys); auto i64Ty = rewriter.getI64Type(); // intptr_t FlatSymbolRefAttr funSymbol = cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::runtime::getLinkableKernelDeviceSide, ptrTy, {i64Ty}, @@ -232,19 +284,17 @@ class CallIndirectCallableOpPattern // device-side functions are located in the same address space as well. None // of these functions should be expected to reside on remote hardware. // Therefore, this will likely only be useful in a simulation target. - auto lookee = rewriter.create( - loc, ptrTy, funSymbol, ValueRange{adaptor.getCallee()}); + auto lookee = LLVM::CallOp::create(rewriter, loc, ptrTy, funSymbol, + ValueRange{adaptor.getCallee()}); auto lookup = - rewriter.create(loc, funcPtrTy, lookee.getResult()); + LLVM::BitcastOp::create(rewriter, loc, funcPtrTy, lookee.getResult()); - // Call the function that was just found in the map. + // Use create() so operandSegmentSizes is set (LLVM 22 + // AttrSizedOperandSegments). SmallVector args = {lookup.getResult()}; args.append(adaptor.getArgs().begin(), adaptor.getArgs().end()); - if (isa(funcTy.getReturnType())) - rewriter.replaceOpWithNewOp(call, std::nullopt, args); - else - rewriter.replaceOpWithNewOp(call, funcTy.getReturnType(), - args); + auto newCall = LLVM::CallOp::create(rewriter, loc, funcTy, args); + rewriter.replaceOp(call, newCall.getResults()); return success(); } }; @@ -329,20 +379,27 @@ class ComputePtrOpPattern LogicalResult matchAndRewrite(cudaq::cc::ComputePtrOp cpOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto operands = adaptor.getOperands(); - auto toTy = getTypeConverter()->convertType(cpOp.getType()); + // Get the CC element type before conversion + auto ccPtrTy = cast(cpOp.getBase().getType()); + Type ccEleTy = ccPtrTy.getElementType(); + // The first operand is the base pointer. - Value base = operands[0]; if (cpOp.llvmNormalForm()) { // In this case, the `cc.compute_ptr` has already been converted such that // it corresponds 1:1 with the C-like semantics of LLVM's getelementptr // operation. Specifically, a pointer to a scalar type is overloaded to // possibly be the same as a pointer to an array with unknown bound. // All operands except the first are indices. + // Extract inner element type from CC array type before conversion + ccEleTy = cast(ccEleTy).getElementType(); auto newOpnds = interleaveConstantsAndOperands( - operands.drop_front(), cpOp.getRawConstantIndices()); + adaptor.getDynamicIndices(), cpOp.getRawConstantIndices()); + // Convert to LLVM type after extracting the element type + Type eleTy = getTypeConverter()->convertType(ccEleTy); // Rewrite the ComputePtrOp as a LLVM::GEPOp. - rewriter.replaceOpWithNewOp(cpOp, toTy, base, newOpnds); + rewriter.replaceOpWithNewOp( + cpOp, cudaq::opt::factory::getPointerType(rewriter.getContext()), + eleTy, adaptor.getBase(), newOpnds); } else { // If the `cc.compute_ptr` operation has a base argument that is not in // LLVM normal form, we implicitly assume that pointer's element type @@ -354,9 +411,13 @@ class ComputePtrOpPattern SmallVector constIndices = {0}; constIndices.append(cpOp.getRawConstantIndices().begin(), cpOp.getRawConstantIndices().end()); - auto newOpnds = - interleaveConstantsAndOperands(operands.drop_front(), constIndices); - rewriter.replaceOpWithNewOp(cpOp, toTy, base, newOpnds); + auto newOpnds = interleaveConstantsAndOperands( + adaptor.getDynamicIndices(), constIndices); + // Convert to LLVM type + Type eleTy = getTypeConverter()->convertType(ccEleTy); + rewriter.replaceOpWithNewOp( + cpOp, cudaq::opt::factory::getPointerType(rewriter.getContext()), + eleTy, adaptor.getBase(), newOpnds); } return success(); } @@ -430,9 +491,9 @@ class GlobalOpPattern : public ConvertOpToLLVMPattern { auto name = global.getSymName(); bool isReadOnly = global.getConstant(); Attribute initializer = global.getValue().value_or(Attribute{}); - rewriter.create(loc, type, isReadOnly, - LLVM::Linkage::Private, name, - initializer, /*alignment=*/0); + mlir::LLVM::GlobalOp::create(rewriter, loc, type, isReadOnly, + LLVM::Linkage::Private, name, initializer, + /*alignment=*/0); rewriter.eraseOp(global); return success(); } @@ -471,41 +532,33 @@ class InstantiateCallableOpPattern Value tmp; auto tupleArgTy = cudaq::opt::lambdaAsPairOfPointers(ctx); if (callable.getNoCapture()) { - auto zero = cudaq::opt::factory::genLlvmI64Constant(loc, rewriter, 0); - tmp = - rewriter.create(loc, tupleArgTy.getBody()[1], zero); + Value zero = cudaq::opt::factory::genLlvmI64Constant(loc, rewriter, 0); + tmp = LLVM::IntToPtrOp::create(rewriter, loc, tupleArgTy.getBody()[1], + zero); } else { - Value tupleVal = rewriter.create(loc, tupleTy); + Value tupleVal = LLVM::UndefOp::create(rewriter, loc, tupleTy); std::int64_t offsetVal = 0; for (auto op : operands) { auto offset = DenseI64ArrayAttr::get(ctx, ArrayRef{offsetVal}); - tupleVal = rewriter.create(loc, tupleTy, tupleVal, - op, offset); + tupleVal = LLVM::InsertValueOp::create(rewriter, loc, tupleTy, tupleVal, + op, offset); offsetVal++; } - auto tuplePtrTy = cudaq::opt::factory::getPointerType(tupleTy); - tmp = cudaq::opt::factory::createLLVMTemporary(loc, rewriter, tuplePtrTy); - rewriter.create(loc, tupleVal, tmp); + tmp = cudaq::opt::factory::createLLVMTemporary(loc, rewriter, tupleTy); + LLVM::StoreOp::create(rewriter, loc, tupleVal, tmp); } - Value tupleArg = rewriter.create(loc, tupleArgTy); - auto module = callable->getParentOfType(); - auto *calledFuncOp = module.lookupSymbol(callable.getCallee()); - auto sigTy = [&]() -> Type { - if (auto calledFunc = dyn_cast(calledFuncOp)) - return getTypeConverter()->convertType(calledFunc.getFunctionType()); - return cudaq::opt::factory::getPointerType( - cast(calledFuncOp).getFunctionType()); - }(); - auto tramp = rewriter.create( - loc, sigTy, callable.getCallee().cast()); + Value tupleArg = LLVM::UndefOp::create(rewriter, loc, tupleArgTy); + auto sigTy = cudaq::opt::factory::getPointerType(ctx); + auto tramp = LLVM::AddressOfOp::create( + rewriter, loc, sigTy, cast(callable.getCallee())); auto trampoline = - rewriter.create(loc, tupleArgTy.getBody()[0], tramp); + LLVM::BitcastOp::create(rewriter, loc, tupleArgTy.getBody()[0], tramp); auto zeroA = DenseI64ArrayAttr::get(ctx, ArrayRef{0}); - tupleArg = rewriter.create(loc, tupleArgTy, tupleArg, - trampoline, zeroA); + tupleArg = LLVM::InsertValueOp::create(rewriter, loc, tupleArgTy, tupleArg, + trampoline, zeroA); auto castTmp = - rewriter.create(loc, tupleArgTy.getBody()[1], tmp); + LLVM::BitcastOp::create(rewriter, loc, tupleArgTy.getBody()[1], tmp); rewriter.replaceOpWithNewOp( callable, tupleArgTy, tupleArg, castTmp, DenseI64ArrayAttr::get(ctx, ArrayRef{1})); @@ -532,8 +585,6 @@ class SizeOfOpPattern : public ConvertOpToLLVMPattern { public: using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; - // Use the GEP approach for now. LLVM is planning to remove support for this - // at some point. See: https://github.com/llvm/llvm-project/issues/71507 LogicalResult matchAndRewrite(cudaq::cc::SizeOfOp sizeOfOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { @@ -541,19 +592,17 @@ class SizeOfOpPattern : public ConvertOpToLLVMPattern { auto resultTy = sizeOfOp.getType(); if (quake::isQuakeType(inputTy) || cudaq::cc::isDynamicallySizedType(inputTy)) { + // Types that cannot be reified produce the poison op. rewriter.replaceOpWithNewOp(sizeOfOp, resultTy); return success(); } auto loc = sizeOfOp.getLoc(); - // TODO: replace this with some target-specific memory layout computation - // when we upgrade to a newer MLIR. - auto zero = rewriter.create(loc, 0, 64); - auto ptrTy = - cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(inputTy)); - auto nullCast = rewriter.create(loc, ptrTy, zero); - Value nextPtr = rewriter.create( - loc, ptrTy, nullCast, ArrayRef{1}); - rewriter.replaceOpWithNewOp(sizeOfOp, resultTy, nextPtr); + // We rely on MLIR here, they are using the GEP approach for now. LLVM is + // planning to remove support for this at some point. + // See: https://github.com/llvm/llvm-project/issues/71507 and + // https://github.com/llvm/llvm-project/issues/96047 + auto sizeOp = getSizeInBytes(loc, inputTy, rewriter); + rewriter.replaceOp(sizeOfOp, sizeOp); return success(); } }; @@ -575,11 +624,11 @@ class OffsetOfOpPattern : public ConvertOpToLLVMPattern { auto loc = offsetOp.getLoc(); // TODO: replace this with some target-specific memory layout computation // when we upgrade to a newer MLIR. - auto zero = rewriter.create(loc, 0, 64); + auto zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); auto ptrTy = cudaq::cc::PointerType::get(inputTy); - auto nul = rewriter.create(loc, ptrTy, zero); + auto nul = cudaq::cc::CastOp::create(rewriter, loc, ptrTy, zero); Value nextPtr = - rewriter.create(loc, ptrTy, nul, args); + cudaq::cc::ComputePtrOp::create(rewriter, loc, ptrTy, nul, args); rewriter.replaceOpWithNewOp(offsetOp, resultTy, nextPtr); return success(); } @@ -600,8 +649,8 @@ class StdvecDataOpPattern auto structTy = dyn_cast(operands[0].getType()); if (!structTy) return data.emitError("stdvec_data must have a struct as argument."); - auto extract = rewriter.create( - data.getLoc(), structTy.getBody()[0], operands[0], zero); + auto extract = LLVM::ExtractValueOp::create( + rewriter, data.getLoc(), structTy.getBody()[0], operands[0], zero); rewriter.replaceOpWithNewOp(data, resTy, extract); return success(); } @@ -620,26 +669,26 @@ class StdvecInitOpPattern auto ctx = init.getContext(); auto zero = DenseI64ArrayAttr::get(ctx, ArrayRef{0}); auto loc = init.getLoc(); - Value val = rewriter.create(loc, resTy); + Value val = LLVM::UndefOp::create(rewriter, loc, resTy); auto structTy = dyn_cast(resTy); if (!structTy) return init.emitError("stdvec_init must have a struct as argument."); - auto cast = rewriter.create(loc, structTy.getBody()[0], - operands[0]); - val = rewriter.create(loc, val, cast, zero); + auto yolo = LLVM::BitcastOp::create(rewriter, loc, structTy.getBody()[0], + operands[0]); + val = LLVM::InsertValueOp::create(rewriter, loc, val, yolo, zero); auto one = DenseI64ArrayAttr::get(ctx, ArrayRef{1}); if (operands.size() == 2) { rewriter.replaceOpWithNewOp(init, val, operands[1], one); } else { std::int64_t arrSize = - llvm::cast( - llvm::cast(operands[0].getType()) + cast( + cast(init.getBuffer().getType()) .getElementType()) - .getNumElements(); + .getSize(); auto i64Ty = rewriter.getI64Type(); - Value len = rewriter.create( - loc, i64Ty, IntegerAttr::get(i64Ty, arrSize)); + Value len = LLVM::ConstantOp::create(rewriter, loc, i64Ty, + IntegerAttr::get(i64Ty, arrSize)); rewriter.replaceOpWithNewOp(init, val, len, one); } return success(); @@ -693,7 +742,7 @@ class CreateStringLiteralOpPattern // Get the string address rewriter.replaceOpWithNewOp( stringLiteralOp, - cudaq::opt::factory::getPointerType(slGlobal.getType()), + cudaq::opt::factory::getPointerType(rewriter.getContext()), slGlobal.getSymName()); return success(); @@ -751,8 +800,19 @@ class VarargCallPattern SmallVector types; for (auto ty : vcall.getResultTypes()) types.push_back(getTypeConverter()->convertType(ty)); - rewriter.replaceOpWithNewOp(vcall, types, vcall.getCallee(), - adaptor.getArgs()); + + // For vararg calls, we need to set the var_callee_type attribute. Look up + // the callee function to get its type. + auto calleeName = vcall.getCalleeAttr(); + TypeAttr varCalleeType; + if (auto func = SymbolTable::lookupNearestSymbolFrom( + vcall, calleeName)) + varCalleeType = TypeAttr::get(func.getFunctionType()); + + auto callOp = rewriter.replaceOpWithNewOp( + vcall, types, calleeName, adaptor.getArgs()); + if (varCalleeType) + callOp.setVarCalleeTypeAttr(varCalleeType); return success(); } }; diff --git a/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp b/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp index 9dc2b679ea3..d484ab866c9 100644 --- a/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp +++ b/lib/Optimizer/CodeGen/ConvertCCToLLVM.cpp @@ -6,13 +6,11 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ +#include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/CodeGen/CCToLLVM.h" -#include "cudaq/Optimizer/CodeGen/CodeGenDialect.h" #include "cudaq/Optimizer/CodeGen/Passes.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h" diff --git a/lib/Optimizer/CodeGen/ConvertToExecMgr.cpp b/lib/Optimizer/CodeGen/ConvertToExecMgr.cpp index 9cb7869cd66..7915d25286f 100644 --- a/lib/Optimizer/CodeGen/ConvertToExecMgr.cpp +++ b/lib/Optimizer/CodeGen/ConvertToExecMgr.cpp @@ -11,23 +11,19 @@ #include "cudaq/Optimizer/CodeGen/CudaqFunctionNames.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/QuakeToExecMgr.h" -#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "llvm/Support/Debug.h" -#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" -#define DEBUG_TYPE "convert-to-cc" - namespace cudaq::opt { #define GEN_PASS_DEF_QUAKETOCCPREP #define GEN_PASS_DEF_QUAKETOCC #include "cudaq/Optimizer/CodeGen/Passes.h.inc" } // namespace cudaq::opt +#define DEBUG_TYPE "convert-to-cc" + using namespace mlir; namespace { @@ -101,7 +97,7 @@ struct QuakeToCCPrepPass return; } - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "Module after prep:\n"; op->dump()); } diff --git a/lib/Optimizer/CodeGen/ConvertToQIR.cpp b/lib/Optimizer/CodeGen/ConvertToQIR.cpp index 686eb82d806..e8442ee0eaa 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIR.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIR.cpp @@ -16,8 +16,6 @@ #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h" #include "cudaq/Optimizer/CodeGen/QuakeToLLVM.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FormatVariadic.h" @@ -31,11 +29,16 @@ #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +namespace cudaq::opt { +#define GEN_PASS_DEF_CONVERTTOQIR +#define GEN_PASS_DEF_LOWERTOCG +#include "cudaq/Optimizer/CodeGen/Passes.h.inc" +} // namespace cudaq::opt + #define DEBUG_TYPE "convert-to-qir" /** @@ -45,12 +48,6 @@ version 0.1. */ -namespace cudaq::opt { -#define GEN_PASS_DEF_CONVERTTOQIR -#define GEN_PASS_DEF_LOWERTOCG -#include "cudaq/Optimizer/CodeGen/Passes.h.inc" -} // namespace cudaq::opt - using namespace mlir; #include "PeepholePatterns.inc" @@ -61,7 +58,7 @@ static LogicalResult fuseSubgraphPatterns(MLIRContext *ctx, ModuleOp module) { RewritePatternSet patterns(ctx); cudaq::codegen::populateQuakeToCodegenPatterns(patterns); LLVM_DEBUG(llvm::dbgs() << "Before codegen dialect:\n"; module.dump()); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) return failure(); LLVM_DEBUG(llvm::dbgs() << "After codegen dialect:\n"; module.dump()); return success(); @@ -120,18 +117,19 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { auto v = [&]() -> Value { auto val = constantValues[idx]; if (auto fTy = dyn_cast(eleTy)) - return builder.create( - loc, cast(val).getValue(), fTy); + return arith::ConstantFloatOp::create( + builder, loc, fTy, cast(val).getValue()); if (auto iTy = dyn_cast(eleTy)) - return builder.create( - loc, cast(val).getInt(), iTy); + return arith::ConstantIntOp::create( + builder, loc, iTy, cast(val).getInt()); auto cTy = cast(eleTy); - return builder.create(loc, cTy, - cast(val)); + return complex::ConstantOp::create(builder, loc, cTy, + cast(val)); }(); - Value arrWithOffset = builder.create( - loc, ptrTy, buffer, ArrayRef{idx}); - builder.create(loc, v, arrWithOffset); + Value arrWithOffset = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrTy, buffer, + ArrayRef{idx}); + cudaq::cc::StoreOp::create(builder, loc, v, arrWithOffset); } cleanUps.push_back(user); } @@ -195,10 +193,12 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { } // namespace void cudaq::opt::initializeTypeConversions(LLVMTypeConverter &typeConverter) { - typeConverter.addConversion( - [](quake::VeqType type) { return getArrayType(type.getContext()); }); - typeConverter.addConversion( - [](quake::RefType type) { return getQubitType(type.getContext()); }); + typeConverter.addConversion([](quake::VeqType type) { + return cg::getLLVMArrayType(type.getContext()); + }); + typeConverter.addConversion([](quake::RefType type) { + return cg::getLLVMQubitType(type.getContext()); + }); typeConverter.addConversion([&](quake::StruqType type) { SmallVector mems; for (auto m : type.getMembers()) diff --git a/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp b/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp index 310de98707b..b369d6d8af9 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp @@ -7,28 +7,20 @@ ******************************************************************************/ #include "CodeGenOps.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" -#include "cudaq/Optimizer/CodeGen/CodeGenDialect.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h" #include "cudaq/Optimizer/CodeGen/QuakeToExecMgr.h" -#include "cudaq/Optimizer/Dialect/CC/CCDialect.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" // for GlobalizeArrayValues #include "nlohmann/json.hpp" #include "llvm/Support/Debug.h" -#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassOptions.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" #define DEBUG_TYPE "convert-to-qir-api" @@ -100,10 +92,11 @@ static Value createGlobalCString(Operation *op, Location loc, cudaq::IRBuilder irb(rewriter.getContext()); auto mod = op->getParentOfType(); auto nameObj = irb.genCStringLiteralAppendNul(loc, mod, regName); - Value nameVal = rewriter.create( - loc, cudaq::cc::PointerType::get(nameObj.getType()), nameObj.getName()); + Value nameVal = cudaq::cc::AddressOfOp::create( + rewriter, loc, cudaq::cc::PointerType::get(nameObj.getType()), + nameObj.getName()); auto cstrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); - return rewriter.create(loc, cstrTy, nameVal); + return cudaq::cc::CastOp::create(rewriter, loc, cstrTy, nameVal); } /// Use modifier class classes to specialize the QIR API to a particular flavor @@ -241,22 +234,22 @@ struct AllocaOpToCallsRewrite : public OpConversionPattern { Value sizeOperand; auto loc = alloc.getLoc(); if (adaptor.getOperands().empty()) { - auto type = alloc.getType().cast(); + auto type = cast(alloc.getType()); if (!type.hasSpecifiedSize()) return failure(); auto constantSize = type.getSize(); sizeOperand = - rewriter.create(loc, constantSize, 64); + arith::ConstantIntOp::create(rewriter, loc, constantSize, 64); } else { sizeOperand = adaptor.getOperands().front(); auto sizeOpTy = cast(sizeOperand.getType()); if (sizeOpTy.getWidth() < 64) - sizeOperand = rewriter.create( - loc, rewriter.getI64Type(), sizeOperand, + sizeOperand = cudaq::cc::CastOp::create( + rewriter, loc, rewriter.getI64Type(), sizeOperand, cudaq::cc::CastOpMode::Unsigned); else if (sizeOpTy.getWidth() > 64) - sizeOperand = rewriter.create( - loc, rewriter.getI64Type(), sizeOperand); + sizeOperand = cudaq::cc::CastOp::create( + rewriter, loc, rewriter.getI64Type(), sizeOperand); } // Replace the AllocaOp with the QIR call. @@ -300,8 +293,9 @@ struct NullCableOpToCallsRewrite // return type. auto loc = nullcable.getLoc(); quake::CableType type = nullcable.getType(); - auto width = type.getSize(); - Value sizeOperand = rewriter.create(loc, width, 64); + auto constantSize = type.getSize(); + Value sizeOperand = + arith::ConstantIntOp::create(rewriter, loc, constantSize, 64); // Replace the NullCableOp with the QIR call. rewriter.replaceOpWithNewOp( @@ -337,7 +331,7 @@ struct AllocaOpToIntRewrite : public OpConversionPattern { // the startingIndex as the qubit value. Voila! if (auto resultType = dyn_cast(ty)) { Value index = - rewriter.create(loc, startingOffset, 64); + arith::ConstantIntOp::create(rewriter, loc, startingOffset, 64); auto qubitTy = M::getQubitType(rewriter.getContext()); rewriter.replaceOpWithNewOp(alloc, qubitTy, index); return success(); @@ -358,8 +352,8 @@ struct AllocaOpToIntRewrite : public OpConversionPattern { SmallVector data; for (std::int64_t i = 0; i < veqSize; ++i) data.emplace_back(startingOffset + i); - auto arr = rewriter.create( - loc, arrTy, rewriter.getI64ArrayAttr(data)); + auto arr = cudaq::cc::ConstantArrayOp::create( + rewriter, loc, arrTy, rewriter.getI64ArrayAttr(data)); Type qirArrTy = M::getArrayType(rewriter.getContext()); rewriter.replaceOpWithNewOp( alloc, qirArrTy, arr); @@ -387,7 +381,7 @@ struct NullWireOpToIntRewrite : public OpConversionPattern { // In this case this is allocating a single qubit, so we can just substitute // the startingIndex as the qubit value. Voila! Value index = - rewriter.create(loc, startingOffset, 64); + arith::ConstantIntOp::create(rewriter, loc, startingOffset, 64); auto qubitTy = M::getQubitType(rewriter.getContext()); rewriter.replaceOpWithNewOp(nullwire, qubitTy, index); return success(); @@ -426,8 +420,8 @@ struct NullCableOpToIntRewrite SmallVector data; for (std::int64_t i = 0; i < cableSize; ++i) data.emplace_back(startingOffset + i); - auto arr = rewriter.create( - loc, arrTy, rewriter.getI64ArrayAttr(data)); + auto arr = cudaq::cc::ConstantArrayOp::create( + rewriter, loc, arrTy, rewriter.getI64ArrayAttr(data)); Type qirArrTy = M::getArrayType(rewriter.getContext()); rewriter.replaceOpWithNewOp( nullcable, qirArrTy, arr); @@ -435,6 +429,15 @@ struct NullCableOpToIntRewrite } }; +template +Type getInitialType(OP op, unsigned off) { + ArrayAttr initialArgs = + op->template getAttrOfType(InitialArgTypesAttrName); + if (!initialArgs) + return {}; + return cast(initialArgs[off]).getValue(); +} + template struct ApplyNoiseOpRewrite : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -444,6 +447,7 @@ struct ApplyNoiseOpRewrite : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { auto loc = noise.getLoc(); + const unsigned paramOffset = noise.getKey() ? 1 : 0; if (!noise.getNoiseFunc()) { // This is the key-based variant. Call the generalized version of the // apply_kraus_channel helper function. Let it do all the conversions into @@ -451,50 +455,51 @@ struct ApplyNoiseOpRewrite : public OpConversionPattern { SmallVector args; const bool pushASpan = adaptor.getParameters().size() == 1 && - isa(adaptor.getParameters()[0].getType()); + isa(getInitialType(noise, paramOffset)); const bool usingDouble = [&]() { if (adaptor.getParameters().empty()) return true; - auto param0 = adaptor.getParameters()[0]; + Type param0Ty = getInitialType(noise, paramOffset); if (pushASpan) - return cast(param0.getType()) - .getElementType() == rewriter.getF64Type(); - return cast(param0.getType()) - .getElementType() == rewriter.getF64Type(); + return cast(param0Ty).getElementType() == + rewriter.getF64Type(); + return cast(param0Ty).getElementType() == + rewriter.getF64Type(); }(); if (usingDouble) { auto code = static_cast( cudaq::opt::KrausChannelDataKind::DoubleKind); - args.push_back(rewriter.create(loc, code, 64)); + args.push_back(arith::ConstantIntOp::create(rewriter, loc, code, 64)); } else { auto code = static_cast( cudaq::opt::KrausChannelDataKind::FloatKind); - args.push_back(rewriter.create(loc, code, 64)); + args.push_back(arith::ConstantIntOp::create(rewriter, loc, code, 64)); } args.push_back(adaptor.getKey()); if (pushASpan) { - args.push_back(rewriter.create(loc, 1, 64)); - args.push_back(rewriter.create(loc, 0, 64)); + args.push_back(arith::ConstantIntOp::create(rewriter, loc, 1, 64)); + args.push_back(arith::ConstantIntOp::create(rewriter, loc, 0, 64)); } else { - args.push_back(rewriter.create(loc, 0, 64)); + args.push_back(arith::ConstantIntOp::create(rewriter, loc, 0, 64)); auto numParams = std::distance(adaptor.getParameters().begin(), adaptor.getParameters().end()); args.push_back( - rewriter.create(loc, numParams, 64)); + arith::ConstantIntOp::create(rewriter, loc, numParams, 64)); } auto numTargets = std::distance(adaptor.getQubits().begin(), adaptor.getQubits().end()); args.push_back( - rewriter.create(loc, numTargets, 64)); + arith::ConstantIntOp::create(rewriter, loc, numTargets, 64)); if (pushASpan) { Value stdvec = adaptor.getParameters()[0]; - auto stdvecTy = cast(stdvec.getType()); + auto stdvecTy = + cast(getInitialType(noise, paramOffset)); auto dataTy = cudaq::cc::PointerType::get( cudaq::cc::ArrayType::get(stdvecTy.getElementType())); args.push_back( - rewriter.create(loc, dataTy, stdvec)); - args.push_back(rewriter.create( - loc, rewriter.getI64Type(), stdvec)); + cudaq::cc::StdvecDataOp::create(rewriter, loc, dataTy, stdvec)); + args.push_back(cudaq::cc::StdvecSizeOp::create( + rewriter, loc, rewriter.getI64Type(), stdvec)); } else { args.append(adaptor.getParameters().begin(), adaptor.getParameters().end()); @@ -529,35 +534,40 @@ struct ApplyNoiseOpRewrite : public OpConversionPattern { // already the case, we just append the operands. SmallVector args; if (adaptor.getParameters().size() == 1 && - isa(adaptor.getParameters()[0].getType())) { + isa(getInitialType(noise, paramOffset))) { Value svp = adaptor.getParameters()[0]; // Convert the device-side span back to a host-side vector so that C++ // doesn't crash. - auto stdvecTy = cast(svp.getType()); + auto stdvecTy = + cast(getInitialType(noise, paramOffset)); auto *ctx = rewriter.getContext(); auto ptrTy = cudaq::cc::PointerType::get(stdvecTy.getElementType()); auto ptrArrTy = cudaq::cc::PointerType::get( cudaq::cc::ArrayType::get(stdvecTy.getElementType())); auto hostVecTy = cudaq::cc::ArrayType::get(ctx, ptrTy, 3); - auto hostVec = rewriter.create(loc, hostVecTy); + auto hostVec = cudaq::cc::AllocaOp::create(rewriter, loc, hostVecTy); Value startPtr = - rewriter.create(loc, ptrArrTy, svp); + cudaq::cc::StdvecDataOp::create(rewriter, loc, ptrArrTy, svp); auto i64Ty = rewriter.getI64Type(); - Value len = rewriter.create(loc, i64Ty, svp); - Value endPtr = rewriter.create( - loc, ptrTy, startPtr, ArrayRef{len}); + Value len = cudaq::cc::StdvecSizeOp::create(rewriter, loc, i64Ty, svp); + Value endPtr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrTy, startPtr, + ArrayRef{len}); Value castStartPtr = - rewriter.create(loc, ptrTy, startPtr); + cudaq::cc::CastOp::create(rewriter, loc, ptrTy, startPtr); auto ptrPtrTy = cudaq::cc::PointerType::get(ptrTy); - Value ptr0 = rewriter.create( - loc, ptrPtrTy, hostVec, ArrayRef{0}); - rewriter.create(loc, castStartPtr, ptr0); - Value ptr1 = rewriter.create( - loc, ptrPtrTy, hostVec, ArrayRef{1}); - rewriter.create(loc, endPtr, ptr1); - Value ptr2 = rewriter.create( - loc, ptrPtrTy, hostVec, ArrayRef{2}); - rewriter.create(loc, endPtr, ptr2); + Value ptr0 = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrPtrTy, hostVec, + ArrayRef{0}); + cudaq::cc::StoreOp::create(rewriter, loc, castStartPtr, ptr0); + Value ptr1 = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrPtrTy, hostVec, + ArrayRef{1}); + cudaq::cc::StoreOp::create(rewriter, loc, endPtr, ptr1); + Value ptr2 = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrPtrTy, hostVec, + ArrayRef{2}); + cudaq::cc::StoreOp::create(rewriter, loc, endPtr, ptr2); // N.B. This pointer must be treated as const by the C++ side and should // never have move semantics! @@ -581,11 +591,15 @@ struct ApplyNoiseOpRewrite : public OpConversionPattern { SmallVector qubits; SmallVector converted; Type qirArrTy = M::getArrayType(rewriter.getContext()); - for (auto [qb, oa] : llvm::zip(adaptor.getQubits(), noise.getQubits())) { - if ((oa && isa(oa.getType())) || - (!oa && (qb.getType() == qirArrTy))) { - auto svec = rewriter.create( - loc, qirArrTy, cudaq::opt::QISConvertArrayToStdvec, ValueRange{qb}); + SmallVector origQubitTys; + for (auto [i, _] : llvm::enumerate(noise.getQubits())) + origQubitTys.push_back(getInitialType( + noise, paramOffset + adaptor.getParameters().size() + i)); + for (auto [qb, oa] : llvm::zip(adaptor.getQubits(), origQubitTys)) { + if (isa(oa)) { + auto svec = func::CallOp::create(rewriter, loc, qirArrTy, + cudaq::opt::QISConvertArrayToStdvec, + ValueRange{qb}); qb = svec.getResult(0); converted.push_back(qb); } @@ -595,8 +609,8 @@ struct ApplyNoiseOpRewrite : public OpConversionPattern { rewriter.replaceOpWithNewOp(noise, TypeRange{}, *noise.getNoiseFunc(), args); for (auto v : converted) - rewriter.create( - loc, TypeRange{}, cudaq::opt::QISFreeConvertedStdvec, ValueRange{v}); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QISFreeConvertedStdvec, ValueRange{v}); return success(); } }; @@ -618,39 +632,42 @@ struct MaterializeConstantArrayOpRewrite } }; +/// This helper base class provides shared functionality to convert single +/// qubits (`!quake.ref`) to vectors of qubits (`!quake.veq`) to satisfy the QIR +/// API. template struct QubitHelperConversionPattern : public OpConversionPattern { using Base = OpConversionPattern; using Base::Base; Value wrapQubitAsArray(Location loc, ConversionPatternRewriter &rewriter, - Value val) const { - Type qubitTy = M::getQubitType(rewriter.getContext()); - if (val.getType() != qubitTy) + Value val, Type origTy) const { + if (isa(origTy)) return val; // Create a QIR array container of 1 element. auto ptrTy = cudaq::cc::PointerType::get(rewriter.getNoneType()); - Value sizeofPtrVal = - rewriter.create(loc, rewriter.getI32Type(), ptrTy); - Value one = rewriter.create(loc, 1, 64); + Value sizeofPtrVal = cudaq::cc::SizeOfOp::create( + rewriter, loc, rewriter.getI32Type(), ptrTy); + Value one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); Type arrayTy = M::getArrayType(rewriter.getContext()); - auto newArr = rewriter.create( - loc, TypeRange{arrayTy}, cudaq::opt::QIRArrayCreateArray, - ArrayRef{sizeofPtrVal, one}); + auto newArr = func::CallOp::create(rewriter, loc, TypeRange{arrayTy}, + cudaq::opt::QIRArrayCreateArray, + ArrayRef{sizeofPtrVal, one}); Value result = newArr.getResult(0); // Get a pointer to element 0. - Value zero = rewriter.create(loc, 0, 64); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + Type qubitTy = M::getQubitType(rewriter.getContext()); auto ptrQubitTy = cudaq::cc::PointerType::get(qubitTy); - auto elePtr = rewriter.create( - loc, TypeRange{ptrQubitTy}, cudaq::opt::QIRArrayGetElementPtr1d, - ArrayRef{result, zero}); + auto elePtr = func::CallOp::create(rewriter, loc, TypeRange{ptrQubitTy}, + cudaq::opt::QIRArrayGetElementPtr1d, + ArrayRef{result, zero}); // Write the qubit into the array at position 0. - auto castVal = rewriter.create(loc, qubitTy, val); + auto castVal = cudaq::cc::CastOp::create(rewriter, loc, qubitTy, val); Value addr = elePtr.getResult(0); - rewriter.create(loc, castVal, addr); + cudaq::cc::StoreOp::create(rewriter, loc, castVal, addr); return result; } @@ -680,11 +697,17 @@ struct ConcatOpRewrite auto loc = concat.getLoc(); Type arrayTy = M::getArrayType(rewriter.getContext()); Value firstOperand = adaptor.getOperands().front(); - Value resultArray = Base::wrapQubitAsArray(loc, rewriter, firstOperand); - for (auto next : adaptor.getOperands().drop_front()) { - Value wrapNext = Base::wrapQubitAsArray(loc, rewriter, next); - auto appended = rewriter.create( - loc, arrayTy, cudaq::opt::QIRArrayConcatArray, + Type firstTy = getInitialType(concat, 0); + Value resultArray = + Base::wrapQubitAsArray(loc, rewriter, firstOperand, firstTy); + SmallVector origTys; + for (auto [i, _] : llvm::enumerate(adaptor.getOperands().drop_front())) + origTys.push_back(getInitialType(concat, i + 1)); + for (auto [next, origTy] : + llvm::zip(adaptor.getOperands().drop_front(), origTys)) { + Value wrapNext = Base::wrapQubitAsArray(loc, rewriter, next, origTy); + auto appended = func::CallOp::create( + rewriter, loc, arrayTy, cudaq::opt::QIRArrayConcatArray, ArrayRef{resultArray, wrapNext}); resultArray = appended.getResult(0); } @@ -740,6 +763,7 @@ struct DeallocLikeErase : public OpConversionPattern { using DeallocOpErase = DeallocLikeErase; using SinkOpErase = DeallocLikeErase; + struct DiscriminateOpRewrite : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -750,7 +774,7 @@ struct DiscriminateOpRewrite auto loc = disc.getLoc(); Value m = adaptor.getMeasurement(); auto i1PtrTy = cudaq::cc::PointerType::get(rewriter.getI1Type()); - auto cast = rewriter.create(loc, i1PtrTy, m); + auto cast = cudaq::cc::CastOp::create(rewriter, loc, i1PtrTy, m); rewriter.replaceOpWithNewOp(disc, cast); return success(); } @@ -776,7 +800,7 @@ struct DiscriminateOpToCallRewrite if (operands.size() == 1 && isa(operands.front().getType())) { auto resultTy = M::getResultType(rewriter.getContext()); operands.front() = - rewriter.create(loc, resultTy, operands.front()); + cudaq::cc::CastOp::create(rewriter, loc, resultTy, operands.front()); } if constexpr (M::discriminateToClassical) { if constexpr (M::qirVersion == QirVersion::version_1_0) { @@ -791,9 +815,10 @@ struct DiscriminateOpToCallRewrite } else { // NB: the double cast here is to avoid folding the pointer casts. auto i64Ty = rewriter.getI64Type(); - auto unu = rewriter.create(loc, i64Ty, operands); + auto unu = cudaq::cc::CastOp::create(rewriter, loc, i64Ty, + adaptor.getOperands()); auto ptrI1Ty = cudaq::cc::PointerType::get(rewriter.getI1Type()); - auto du = rewriter.create(loc, ptrI1Ty, unu); + auto du = cudaq::cc::CastOp::create(rewriter, loc, ptrI1Ty, unu); rewriter.replaceOpWithNewOp(disc, du); } return success(); @@ -820,16 +845,16 @@ struct ExtractRefOpRewrite : public OpConversionPattern { Value index; if (!adaptor.getIndex()) { - index = rewriter.create( - loc, extract.getConstantIndex(), 64); + index = arith::ConstantIntOp::create(rewriter, loc, + extract.getConstantIndex(), 64); } else { index = adaptor.getIndex(); if (index.getType().isIntOrFloat()) { if (cast(index.getType()).getWidth() < 64) - index = rewriter.create( - loc, i64Ty, index, cudaq::cc::CastOpMode::Unsigned); + index = cudaq::cc::CastOp::create(rewriter, loc, i64Ty, index, + cudaq::cc::CastOpMode::Unsigned); else if (cast(index.getType()).getWidth() > 64) - index = rewriter.create(loc, i64Ty, index); + index = cudaq::cc::CastOp::create(rewriter, loc, i64Ty, index); } } auto qubitTy = M::getQubitType(rewriter.getContext()); @@ -837,15 +862,15 @@ struct ExtractRefOpRewrite : public OpConversionPattern { if (auto mca = veq.getDefiningOp()) { // This is the profile QIR case. - auto ext = rewriter.create( - loc, i64Ty, mca.getConstArray(), index); + auto ext = cudaq::cc::ExtractValueOp::create(rewriter, loc, i64Ty, + mca.getConstArray(), index); rewriter.replaceOpWithNewOp(extract, qubitTy, ext); return success(); } // Otherwise, this must be full QIR. - auto call = rewriter.create( - loc, cudaq::cc::PointerType::get(qubitTy), + auto call = func::CallOp::create( + rewriter, loc, cudaq::cc::PointerType::get(qubitTy), cudaq::opt::QIRArrayGetElementPtr1d, ArrayRef{veq, index}); rewriter.replaceOpWithNewOp(extract, call.getResult(0)); return success(); @@ -888,12 +913,12 @@ struct MakeStruqOpRewrite : public OpConversionPattern { auto loc = mkstruq.getLoc(); auto *ctx = rewriter.getContext(); auto toTy = getTypeConverter()->convertType(mkstruq.getType()); - Value result = rewriter.create(loc, toTy); + Value result = cudaq::cc::UndefOp::create(rewriter, loc, toTy); std::int64_t count = 0; for (auto op : adaptor.getOperands()) { auto off = DenseI64ArrayAttr::get(ctx, ArrayRef{count}); - result = - rewriter.create(loc, toTy, result, op, off); + result = cudaq::cc::InsertValueOp::create(rewriter, loc, toTy, result, op, + off); count++; } rewriter.replaceOp(mkstruq, result); @@ -967,20 +992,20 @@ struct QmemRAIIOpRewrite : public OpConversionPattern { auto type = dyn_cast(allocTy); auto constantSize = type ? type.getSize() : 1; sizeOperand = - rewriter.create(loc, constantSize, 64); + arith::ConstantIntOp::create(rewriter, loc, constantSize, 64); } else { sizeOperand = adaptor.getAllocSize(); auto sizeTy = cast(sizeOperand.getType()); if (sizeTy.getWidth() < 64) - sizeOperand = rewriter.create( - loc, i64Ty, sizeOperand, cudaq::cc::CastOpMode::Unsigned); + sizeOperand = cudaq::cc::CastOp::create( + rewriter, loc, i64Ty, sizeOperand, cudaq::cc::CastOpMode::Unsigned); else if (sizeTy.getWidth() > 64) sizeOperand = - rewriter.create(loc, i64Ty, sizeOperand); + cudaq::cc::CastOp::create(rewriter, loc, i64Ty, sizeOperand); } // Call the allocation function - Value casted = rewriter.create(loc, ptrTy, ccState); + Value casted = cudaq::cc::CastOp::create(rewriter, loc, ptrTy, ccState); rewriter.replaceOpWithNewOp( raii, arrayTy, functionName, ArrayRef{sizeOperand, casted}); return success(); @@ -1009,24 +1034,24 @@ struct SubveqOpRewrite : public OpConversionPattern { auto lowArg = [&]() -> Value { if (!adaptor.getLower()) - return rewriter.create(loc, adaptor.getRawLower(), - 64); + return arith::ConstantIntOp::create(rewriter, loc, + adaptor.getRawLower(), 64); return adaptor.getLower(); }(); auto highArg = [&]() -> Value { if (!adaptor.getUpper()) - return rewriter.create(loc, adaptor.getRawUpper(), - 64); + return arith::ConstantIntOp::create(rewriter, loc, + adaptor.getRawUpper(), 64); return adaptor.getUpper(); }(); auto i64Ty = rewriter.getI64Type(); auto extend = [&](Value &v) -> Value { if (auto intTy = dyn_cast(v.getType())) { if (intTy.getWidth() < 64) - return rewriter.create( - loc, i64Ty, v, cudaq::cc::CastOpMode::Unsigned); + return cudaq::cc::CastOp::create(rewriter, loc, i64Ty, v, + cudaq::cc::CastOpMode::Unsigned); if (intTy.getWidth() > 64) - return rewriter.create(loc, i64Ty, v); + return cudaq::cc::CastOp::create(rewriter, loc, i64Ty, v); } return v; }; @@ -1034,8 +1059,8 @@ struct SubveqOpRewrite : public OpConversionPattern { highArg = extend(highArg); Value inArr = adaptor.getVeq(); auto i32Ty = rewriter.getI32Type(); - Value one32 = rewriter.create(loc, 1, i32Ty); - Value one64 = rewriter.create(loc, 1, i64Ty); + Value one32 = arith::ConstantIntOp::create(rewriter, loc, i32Ty, 1); + Value one64 = arith::ConstantIntOp::create(rewriter, loc, i64Ty, 1); auto arrayTy = M::getArrayType(rewriter.getContext()); rewriter.replaceOpWithNewOp( subveq, arrayTy, cudaq::opt::QIRArraySlice, @@ -1096,12 +1121,20 @@ struct CustomUnitaryOpPattern return unitary.emitOpError("Custom operations must have targets."); // Concat all the targets into an array. - auto targetArray = - Base::wrapQubitAsArray(loc, rewriter, adaptor.getTargets().front()); - for (auto next : adaptor.getTargets().drop_front()) { - auto wrapNext = Base::wrapQubitAsArray(loc, rewriter, next); - auto result = rewriter.create( - loc, arrayTy, cudaq::opt::QIRArrayConcatArray, + Type firstTy = getInitialType(unitary, adaptor.getParameters().size() + + adaptor.getControls().size()); + auto targetArray = Base::wrapQubitAsArray( + loc, rewriter, adaptor.getTargets().front(), firstTy); + SmallVector origTys; + for (auto [i, _] : llvm::enumerate(adaptor.getTargets().drop_front())) + origTys.push_back( + getInitialType(unitary, adaptor.getParameters().size() + + adaptor.getControls().size() + i + 1)); + for (auto [next, origTy] : + llvm::zip(adaptor.getTargets().drop_front(), origTys)) { + auto wrapNext = Base::wrapQubitAsArray(loc, rewriter, next, origTy); + auto result = func::CallOp::create( + rewriter, loc, arrayTy, cudaq::opt::QIRArrayConcatArray, ArrayRef{targetArray, wrapNext}); targetArray = result.getResult(0); } @@ -1110,15 +1143,21 @@ struct CustomUnitaryOpPattern Value controlArray; if (adaptor.getControls().empty()) { // Use a nullptr for when 0 control qubits are present. - Value zero = rewriter.create(loc, 0, 64); - controlArray = rewriter.create(loc, arrayTy, zero); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + controlArray = cudaq::cc::CastOp::create(rewriter, loc, arrayTy, zero); } else { - controlArray = - Base::wrapQubitAsArray(loc, rewriter, adaptor.getControls().front()); - for (auto next : adaptor.getControls().drop_front()) { - auto wrapNext = Base::wrapQubitAsArray(loc, rewriter, next); - auto result = rewriter.create( - loc, arrayTy, cudaq::opt::QIRArrayConcatArray, + Type firstTy = getInitialType(unitary, adaptor.getParameters().size()); + controlArray = Base::wrapQubitAsArray( + loc, rewriter, adaptor.getControls().front(), firstTy); + SmallVector origTys; + for (auto [i, _] : llvm::enumerate(adaptor.getControls().drop_front())) + origTys.push_back( + getInitialType(unitary, adaptor.getParameters().size() + i + 1)); + for (auto [next, origTy] : + llvm::zip(adaptor.getControls().drop_front(), origTys)) { + auto wrapNext = Base::wrapQubitAsArray(loc, rewriter, next, origTy); + auto result = func::CallOp::create( + rewriter, loc, arrayTy, cudaq::opt::QIRArrayConcatArray, ArrayRef{controlArray, wrapNext}); controlArray = result.getResult(0); } @@ -1136,10 +1175,10 @@ struct CustomUnitaryOpPattern auto complex64PtrTy = cudaq::cc::PointerType::get(complex64Ty); auto globalObj = cast( unitary->getParentOfType().lookupSymbol(generatorName)); - auto addrOp = rewriter.create( - loc, globalObj.getType(), generatorName); + auto addrOp = cudaq::cc::AddressOfOp::create( + rewriter, loc, globalObj.getType(), generatorName); auto unitaryData = - rewriter.create(loc, complex64PtrTy, addrOp); + cudaq::cc::CastOp::create(rewriter, loc, complex64PtrTy, addrOp); StringRef functionName = unitary.isAdj() ? cudaq::opt::QIRCustomAdjOp : cudaq::opt::QIRCustomOp; @@ -1179,18 +1218,29 @@ struct ExpPauliOpPattern if (adaptor.getNegatedQubitControls()) return pauli->emitOpError("negated control qubits not allowed."); SmallVector controls; + const auto firstControlIndex = adaptor.getParameters().size(); if (adaptor.getControls().empty()) { // do nothing } else if (adaptor.getControls().size() > 1 || - !isa(adaptor.getControls().front().getType())) { + !isa(getInitialType(pauli, firstControlIndex))) { // Concat all controls into a single Array. Type arrayTy = M::getArrayType(rewriter.getContext()); + auto wrapIfQubit = [&](Value adaptorVal, Type origTy) { + if (isa(origTy)) + return adaptorVal; + return Base::wrapQubitAsArray(loc, rewriter, adaptorVal, origTy); + }; Value firstOperand = adaptor.getControls().front(); - Value resultArray = Base::wrapQubitAsArray(loc, rewriter, firstOperand); - for (auto next : adaptor.getControls().drop_front()) { - Value wrapNext = Base::wrapQubitAsArray(loc, rewriter, next); - auto appended = rewriter.create( - loc, arrayTy, cudaq::opt::QIRArrayConcatArray, + Type firstTy = getInitialType(pauli, firstControlIndex); + Value resultArray = wrapIfQubit(firstOperand, firstTy); + SmallVector origCtrlTys; + for (auto [i, _] : llvm::enumerate(adaptor.getControls().drop_front())) + origCtrlTys.push_back(getInitialType(pauli, firstControlIndex + i + 1)); + for (auto [next, origCtrlTy] : + llvm::zip(adaptor.getControls().drop_front(), origCtrlTys)) { + Value wrapNext = wrapIfQubit(next, origCtrlTy); + auto appended = func::CallOp::create( + rewriter, loc, arrayTy, cudaq::opt::QIRArrayConcatArray, ArrayRef{resultArray, wrapNext}); resultArray = appended.getResult(0); } @@ -1199,16 +1249,23 @@ struct ExpPauliOpPattern controls.push_back(adaptor.getControls().front()); } SmallVector targets; - if (adaptor.getTargets().size() > 1 || - !isa(adaptor.getTargets().front().getType())) { + const auto firstTargetIndex = + firstControlIndex + adaptor.getControls().size(); + Type firstTy = getInitialType(pauli, firstTargetIndex); + if (adaptor.getTargets().size() > 1 || !isa(firstTy)) { // Concat all targets into a single Array. Type arrayTy = M::getArrayType(rewriter.getContext()); Value firstOperand = adaptor.getTargets().front(); - Value resultArray = Base::wrapQubitAsArray(loc, rewriter, firstOperand); - for (auto next : adaptor.getTargets().drop_front()) { - Value wrapNext = Base::wrapQubitAsArray(loc, rewriter, next); - auto appended = rewriter.create( - loc, arrayTy, cudaq::opt::QIRArrayConcatArray, + Value resultArray = + Base::wrapQubitAsArray(loc, rewriter, firstOperand, firstTy); + SmallVector origTargTys; + for (auto [i, _] : llvm::enumerate(adaptor.getTargets().drop_front())) + origTargTys.push_back(getInitialType(pauli, firstTargetIndex + i + 1)); + for (auto [next, origTy] : + llvm::zip(adaptor.getTargets().drop_front(), origTargTys)) { + Value wrapNext = Base::wrapQubitAsArray(loc, rewriter, next, origTy); + auto appended = func::CallOp::create( + rewriter, loc, arrayTy, cudaq::opt::QIRArrayConcatArray, ArrayRef{resultArray, wrapNext}); resultArray = appended.getResult(0); } @@ -1221,7 +1278,7 @@ struct ExpPauliOpPattern auto qirFunctionName = M::quakeToFuncName(pauli); if (pauli.isAdj()) { for (auto v : adaptor.getParameters()) - operands.push_back(rewriter.create(loc, v)); + operands.push_back(arith::NegFOp::create(rewriter, loc, v)); } else { operands.append(adaptor.getParameters().begin(), adaptor.getParameters().end()); @@ -1242,7 +1299,7 @@ struct ExpPauliOpPattern auto arrSize = llvmArrTy.getNumElements(); auto toTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get( rewriter.getContext(), arrEleTy, arrSize)); - return rewriter.create(loc, toTy, glob); + return cudaq::cc::CastOp::create(rewriter, loc, toTy, glob); } return adaptor.getPauli(); }(); @@ -1253,64 +1310,74 @@ struct ExpPauliOpPattern // directly (a.k.a. a span)`{i8*,i64}` or a string literal `ptr>`. If it is a string literal, we need to map it to a pauli word. auto i8PtrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); - if (auto ptrTy = dyn_cast(pauliWord.getType())) { - // Make sure we have the right types to extract the length of the string - // literal - auto arrayTy = dyn_cast(ptrTy.getElementType()); - if (!arrayTy) - return pauli.emitOpError( - "exp_pauli string literal must have ptr type."); - if (!arrayTy.getSize()) - return pauli.emitOpError("string literal may not be empty."); - - // We must create the {i8*, i64} struct from the string literal - SmallVector structTys{i8PtrTy, rewriter.getI64Type()}; - auto structTy = - cudaq::cc::StructType::get(rewriter.getContext(), structTys); - - // Allocate the char span struct + Type wordTy; + if (!pauli.getPauliLiteral()) + wordTy = + getInitialType(pauli, firstTargetIndex + adaptor.getTargets().size()); + if (wordTy && isa(wordTy)) { + // The attribute tells us we have a pauli word expressed as `{i8*, i64}`. + // Allocate a stack slot for it and store what we have to that pointer, + // pass the pointer to NVQIR. + auto newPauliWord = pauliWord; + auto newPauliWordTy = newPauliWord.getType(); Value alloca = - cudaq::opt::factory::createTemporary(loc, rewriter, structTy); - - // Convert the number of elements to a constant op. - auto size = - rewriter.create(loc, arrayTy.getSize() - 1, 64); - - // Set the string literal data + cudaq::opt::factory::createTemporary(loc, rewriter, newPauliWordTy); + auto castedVar = cudaq::cc::CastOp::create( + rewriter, loc, cudaq::cc::PointerType::get(newPauliWordTy), alloca); + cudaq::cc::StoreOp::create(rewriter, loc, newPauliWord, castedVar); auto castedPauli = - rewriter.create(loc, i8PtrTy, pauliWord); - auto strPtr = rewriter.create( - loc, cudaq::cc::PointerType::get(i8PtrTy), alloca, - ArrayRef{0, 0}); - rewriter.create(loc, castedPauli, strPtr); - - // Set the integer length - auto intPtr = rewriter.create( - loc, cudaq::cc::PointerType::get(rewriter.getI64Type()), alloca, - ArrayRef{0, 1}); - rewriter.create(loc, size, intPtr); - - // Cast to raw opaque pointer - auto castedStore = - rewriter.create(loc, i8PtrTy, alloca); - operands.back() = castedStore; + cudaq::cc::CastOp::create(rewriter, loc, i8PtrTy, alloca); + operands.back() = castedPauli; rewriter.replaceOpWithNewOp(pauli, TypeRange{}, qirFunctionName, operands); return success(); } + // Make sure we have the right types to extract the length of the string + // literal. - // Here we know we have a pauli word expressed as `{i8*, i64}`. Allocate a - // stack slot for it and store what we have to that pointer, pass the - // pointer to NVQIR. - auto newPauliWord = pauliWord; - auto newPauliWordTy = newPauliWord.getType(); + auto ptrTy = [&]() -> cudaq::cc::PointerType { + if (wordTy) + return dyn_cast(wordTy); + return dyn_cast(pauliWord.getType()); + }(); + auto arrayTy = dyn_cast(ptrTy.getElementType()); + if (!arrayTy) + return pauli.emitOpError( + "exp_pauli string literal must have ptr type."); + if (!arrayTy.getSize()) + return pauli.emitOpError("string literal may not be empty."); + + // We must create the {i8*, i64} struct from the string literal + SmallVector structTys{i8PtrTy, rewriter.getI64Type()}; + auto structTy = + cudaq::cc::StructType::get(rewriter.getContext(), structTys); + + // Allocate the char span struct Value alloca = - cudaq::opt::factory::createTemporary(loc, rewriter, newPauliWordTy); - auto castedVar = rewriter.create( - loc, cudaq::cc::PointerType::get(newPauliWordTy), alloca); - rewriter.create(loc, newPauliWord, castedVar); - auto castedPauli = rewriter.create(loc, i8PtrTy, alloca); - operands.back() = castedPauli; + cudaq::opt::factory::createTemporary(loc, rewriter, structTy); + + // Convert the number of elements to a constant op. + auto size = + arith::ConstantIntOp::create(rewriter, loc, arrayTy.getSize() - 1, 64); + + // Set the string literal data + auto castedPauli = + cudaq::cc::CastOp::create(rewriter, loc, i8PtrTy, pauliWord); + auto strPtr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, cudaq::cc::PointerType::get(i8PtrTy), alloca, + ArrayRef{0, 0}); + cudaq::cc::StoreOp::create(rewriter, loc, castedPauli, strPtr); + + // Set the integer length + auto intPtr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, cudaq::cc::PointerType::get(rewriter.getI64Type()), + alloca, ArrayRef{0, 1}); + cudaq::cc::StoreOp::create(rewriter, loc, size, intPtr); + + // Cast to raw opaque pointer + auto castedStore = + cudaq::cc::CastOp::create(rewriter, loc, i8PtrTy, alloca); + operands.back() = castedStore; rewriter.replaceOpWithNewOp(pauli, TypeRange{}, qirFunctionName, operands); return success(); @@ -1356,13 +1423,13 @@ struct MeasurementOpPattern : public OpConversionPattern { } auto resultTy = M::getResultType(rewriter.getContext()); auto call = - rewriter.create(loc, resultTy, functionName, args); + func::CallOp::create(rewriter, loc, resultTy, functionName, args); auto assundry = filterArgs(mz, adaptor.getTargets()); SmallVector replaceVals; if (measOutIsHandle) { auto i64Ty = rewriter.getI64Type(); replaceVals.push_back( - rewriter.create(loc, i64Ty, call.getResult(0))); + cudaq::cc::CastOp::create(rewriter, loc, i64Ty, call.getResult(0))); } else { replaceVals.append(call.getResults().begin(), call.getResults().end()); } @@ -1381,12 +1448,12 @@ struct MeasurementOpPattern : public OpConversionPattern { auto resultAttr = mz->getAttr(cudaq::opt::ResultIndexAttrName); std::int64_t annInt = cast(resultAttr).getInt(); - Value intVal = rewriter.create(loc, annInt, 64); + Value intVal = arith::ConstantIntOp::create(rewriter, loc, annInt, 64); auto resultTy = M::getResultType(rewriter.getContext()); - Value res = rewriter.create(loc, resultTy, intVal); + Value res = cudaq::cc::CastOp::create(rewriter, loc, resultTy, intVal); args.push_back(res); auto call = - rewriter.create(loc, TypeRange{}, functionName, args); + func::CallOp::create(rewriter, loc, TypeRange{}, functionName, args); call->setAttr(cudaq::opt::QIRRegisterNameAttr, regNameAttr); // For handle-form callers, materialize the back-cast `Result* -> i64` // here so it dominates downstream uses. The `!discriminateToClassical` @@ -1395,7 +1462,7 @@ struct MeasurementOpPattern : public OpConversionPattern { auto i64Ty = rewriter.getI64Type(); Value handleRes = - measOutIsHandle ? rewriter.create(loc, i64Ty, res) + measOutIsHandle ? cudaq::cc::CastOp::create(rewriter, loc, i64Ty, res) : res; auto cstringGlobal = createGlobalCString(mz, loc, rewriter, regNameAttr.getValue()); @@ -1408,9 +1475,9 @@ struct MeasurementOpPattern : public OpConversionPattern { } auto func = mz->getParentOfType(); if (!func->hasAttr(cudaq::runtime::enableCudaqRun)) { - auto recOut = rewriter.create( - loc, TypeRange{}, cudaq::opt::QIRRecordOutput, - ArrayRef{res, cstringGlobal}); + auto recOut = func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRRecordOutput, + ArrayRef{res, cstringGlobal}); recOut->setAttr(cudaq::opt::ResultIndexAttrName, resultAttr); recOut->setAttr(cudaq::opt::QIRRegisterNameAttr, regNameAttr); } @@ -1440,8 +1507,8 @@ struct ResetOpPattern : public OpConversionPattern { } else { auto loc = reset.getLoc(); auto results = filterArgs(reset, adaptor.getOperands()); - rewriter.create(loc, TypeRange{}, qirFunctionName, - adaptor.getOperands()); + func::CallOp::create(rewriter, loc, TypeRange{}, qirFunctionName, + adaptor.getOperands()); rewriter.replaceOp(reset, results); } return success(); @@ -1458,12 +1525,12 @@ struct ApplyOpTrap : public OpConversionPattern { matchAndRewrite(quake::ApplyOp apply, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto loc = apply.getLoc(); - Value zero = rewriter.create(loc, 0, 64); - rewriter.create(loc, TypeRange{}, cudaq::opt::QISTrap, - ValueRange{zero}); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + func::CallOp::create(rewriter, loc, TypeRange{}, cudaq::opt::QISTrap, + ValueRange{zero}); SmallVector values; for (auto r : apply.getResults()) { - Value v = rewriter.create(loc, r.getType()); + Value v = cudaq::cc::PoisonOp::create(rewriter, loc, r.getType()); values.push_back(v); } rewriter.replaceOp(apply, values); @@ -1488,8 +1555,8 @@ struct CallByRefOpRewrite : public OpConversionPattern { if (quake::isQuantumValueType(valarg.getType())) quantumArgs.push_back(qirarg); - auto refCall = rewriter.create( - loc, fn.getFunctionType().getResults(), + auto refCall = func::CallOp::create( + rewriter, loc, fn.getFunctionType().getResults(), adaptor.getCallee().getRootReference().getValue(), adaptor.getArgs()); // Concat the formal results and the quantum arguments to rewrite the uses. @@ -1614,14 +1681,14 @@ struct QuantumGatePattern : public OpConversionPattern { // If this is adjoint, each parameter is negated. if (op.getIsAdj()) { for (std::size_t i = 0; i < opParams.size(); ++i) - opParams[i] = rewriter.create(loc, opParams[i]); + opParams[i] = arith::NegFOp::create(rewriter, loc, opParams[i]); if constexpr (std::is_same_v) { std::swap(opParams[0], opParams[1]); auto fltTy = cast(opParams[0].getType()); - Value pi = rewriter.create( - loc, llvm::APFloat{M_PI}, fltTy); - opParams[0] = rewriter.create(loc, opParams[0], pi); - opParams[1] = rewriter.create(loc, opParams[1], pi); + Value pi = arith::ConstantFloatOp::create(rewriter, loc, fltTy, + llvm::APFloat{M_PI}); + opParams[0] = arith::SubFOp::create(rewriter, loc, opParams[0], pi); + opParams[1] = arith::AddFOp::create(rewriter, loc, opParams[1], pi); } else if constexpr (std::is_same_v) { // swap the 2nd and 3rd parameter for correctness std::swap(opParams[1], opParams[2]); @@ -1633,7 +1700,7 @@ struct QuantumGatePattern : public OpConversionPattern { for (std::size_t i = 0; i < opParams.size(); ++i) { if (opParams[i].getType().getIntOrFloatBitWidth() != 64) opParams[i] = - rewriter.create(loc, f64Ty, opParams[i]); + cudaq::cc::CastOp::create(rewriter, loc, f64Ty, opParams[i]); } } @@ -1641,14 +1708,14 @@ struct QuantumGatePattern : public OpConversionPattern { // just add a call and forward the target qubits as needed. auto numControls = adaptor.getControls().size(); if (op.getControls().empty() || - conformsToIntendedCall(numControls, op.getControls().front(), op, - qirFunctionName)) { + conformsToIntendedCall(numControls, getInitialType(op, opParams.size()), + op, qirFunctionName)) { SmallVector args{opParams.begin(), opParams.end()}; args.append(adaptor.getControls().begin(), adaptor.getControls().end()); args.append(adaptor.getTargets().begin(), adaptor.getTargets().end()); qirFunctionName = specializeFunctionName(op, qirFunctionName, numControls); - rewriter.create(loc, TypeRange{}, qirFunctionName, args); + func::CallOp::create(rewriter, loc, TypeRange{}, qirFunctionName, args); return forwardOrEraseOp(); } @@ -1665,22 +1732,24 @@ struct QuantumGatePattern : public OpConversionPattern { Type i64Ty = rewriter.getI64Type(); auto ptrNoneTy = M::getLLVMPointerType(rewriter.getContext()); - // Process the controls, sorting them by type. - for (auto pr : llvm::zip(op.getControls(), adaptor.getControls())) { - if (isaVeqArgument(std::get<0>(pr).getType())) { + // Process the controls, sorting them by type. Using the original + // type recorded by QuakeToQIRAPIPrep, since opaque pointers + // make Array* and Qubit* indistinguishable on the live operand. + for (auto [i, val] : llvm::enumerate(adaptor.getControls())) { + Type origCtrlTy = getInitialType(op, opParams.size() + i); + if (isaVeqArgument(origCtrlTy)) { numArrayCtrls++; - auto sizeCall = rewriter.create( - loc, i64Ty, cudaq::opt::QIRArrayGetSize, - ValueRange{std::get<1>(pr)}); + auto sizeCall = func::CallOp::create( + rewriter, loc, i64Ty, cudaq::opt::QIRArrayGetSize, ValueRange{val}); // Arrays are encoded as pairs of arguments: length and Array* opArrCtrls.push_back(sizeCall.getResult(0)); - opArrCtrls.push_back(rewriter.create( - loc, ptrNoneTy, std::get<1>(pr))); + opArrCtrls.push_back( + cudaq::cc::CastOp::create(rewriter, loc, ptrNoneTy, val)); } else { numQubitCtrls++; // Qubits are simply the Qubit** - opQubitCtrls.emplace_back(rewriter.create( - loc, ptrNoneTy, std::get<1>(pr))); + opQubitCtrls.emplace_back( + cudaq::cc::CastOp::create(rewriter, loc, ptrNoneTy, val)); } } @@ -1694,9 +1763,9 @@ struct QuantumGatePattern : public OpConversionPattern { return op.emitError("cannot find " + qirFunctionName); FunctionType qirFunctionTy = funOp.getFunctionType(); auto funCon = - rewriter.create(loc, qirFunctionTy, qirFunctionName); + func::ConstantOp::create(rewriter, loc, qirFunctionTy, qirFunctionName); auto funPtr = - rewriter.create(loc, ptrNoneTy, funCon); + cudaq::cc::FuncToPtrOp::create(rewriter, loc, ptrNoneTy, funCon); // Process the target qubits. auto numTargets = adaptor.getTargets().size(); @@ -1704,18 +1773,18 @@ struct QuantumGatePattern : public OpConversionPattern { return op.emitOpError("quake op must have at least 1 target."); SmallVector opTargs; for (auto t : adaptor.getTargets()) - opTargs.push_back(rewriter.create(loc, ptrNoneTy, t)); + opTargs.push_back(cudaq::cc::CastOp::create(rewriter, loc, ptrNoneTy, t)); // Build the declared arguments for the helper call (5 total). SmallVector args; args.emplace_back( - rewriter.create(loc, opParams.size(), 64)); + arith::ConstantIntOp::create(rewriter, loc, opParams.size(), 64)); args.emplace_back( - rewriter.create(loc, numArrayCtrls, 64)); + arith::ConstantIntOp::create(rewriter, loc, numArrayCtrls, 64)); args.emplace_back( - rewriter.create(loc, numQubitCtrls, 64)); + arith::ConstantIntOp::create(rewriter, loc, numQubitCtrls, 64)); args.emplace_back( - rewriter.create(loc, numTargets, 64)); + arith::ConstantIntOp::create(rewriter, loc, numTargets, 64)); args.emplace_back(funPtr); // Finally, append the varargs to the end of the argument list. @@ -1725,8 +1794,9 @@ struct QuantumGatePattern : public OpConversionPattern { args.append(opTargs.begin(), opTargs.end()); // Call the generalized version of the gate invocation. - rewriter.create( - loc, TypeRange{}, cudaq::opt::NVQIRGeneralizedInvokeAny, args); + cudaq::cc::VarargCallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::NVQIRGeneralizedInvokeAny, + args); return forwardOrEraseOp(); } @@ -1742,11 +1812,10 @@ struct QuantumGatePattern : public OpConversionPattern { return isa(ty) || alreadyConverted(ty); } - static bool conformsToIntendedCall(std::size_t numControls, Value ctrl, OP op, - StringRef qirFunctionName) { + static bool conformsToIntendedCall(std::size_t numControls, Type ctrlTy, + OP op, StringRef qirFunctionName) { if (numControls != 1) return false; - auto ctrlTy = ctrl.getType(); auto trivialName = specializeFunctionName(op, qirFunctionName, numControls); const bool nameChanged = trivialName != qirFunctionName; if (nameChanged && !isa(ctrlTy)) @@ -1794,11 +1863,10 @@ struct AllocaOpPattern : public OpConversionPattern { }; struct ReturnOpPattern : public OpConversionPattern { - using Base = OpConversionPattern; - using Base::Base; + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(func::ReturnOp op, typename Base::OpAdaptor adaptor, + matchAndRewrite(func::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp(op, adaptor.getOperands()); return success(); @@ -1835,7 +1903,7 @@ struct FuncSignaturePattern : public OpConversionPattern { blockArg.setType(newTy); } // Replace the signature. - rewriter.updateRootInPlace(func, [&]() { + rewriter.modifyOpInPlace(func, [&]() { func.setFunctionType(newFuncTy); func->setAttr(FuncIsQIRAPI, rewriter.getUnitAttr()); }); @@ -1863,8 +1931,8 @@ struct CreateLambdaPattern blockArg.setType(argTy); } // Replace the signature. - rewriter.updateRootInPlace(op, - [&]() { op.getSignature().setType(newSigTy); }); + rewriter.modifyOpInPlace(op, + [&]() { op.getSignature().setType(newSigTy); }); return success(); } }; @@ -1986,7 +2054,8 @@ struct CondBranchOpPattern : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp( op, adaptor.getCondition(), adaptor.getTrueDestOperands(), - adaptor.getFalseDestOperands(), op.getTrueDest(), op.getFalseDest()); + adaptor.getFalseDestOperands(), DenseI32ArrayAttr(), op.getTrueDest(), + op.getFalseDest()); return success(); } }; @@ -2040,11 +2109,7 @@ static void commonQuakeHandlingPatterns(RewritePatternSet &patterns, template Type GetLLVMPointerType(MLIRContext *ctx) { - if constexpr (opaquePtr) { - return LLVM::LLVMPointerType::get(ctx); - } else { - return LLVM::LLVMPointerType::get(IntegerType::get(ctx, 8)); - } + return LLVM::LLVMPointerType::get(ctx); } /// The modifier class for the "full QIR" API. @@ -2423,7 +2488,7 @@ struct QuakeToQIRAPIPrepPass RewritePatternSet patterns(ctx); QIRAPITypeConverter typeConverter(opaquePtr); cudaq::opt::populateQuakeToCCPrepPatterns(patterns); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) { + if (failed(applyPatternsGreedily(module, std::move(patterns)))) { signalPassFailure(); return; } @@ -2575,10 +2640,11 @@ struct QuakeToQIRAPIPrepPass auto *ctx = module.getContext(); module.walk([&](Operation *op) { - if (!std::any_of(op->getResultTypes().begin(), op->getResultTypes().end(), - quake::isQuantumValueType) || - !std::any_of(op->getOperandTypes().begin(), - op->getOperandTypes().end(), quake::isQuantumValueType)) + if (std::all_of(op->getResultTypes().begin(), op->getResultTypes().end(), + [&](Type ty) { return !quake::isQuantumType(ty); }) && + std::all_of(op->getOperandTypes().begin(), + op->getOperandTypes().end(), + [&](Type ty) { return !quake::isQuantumType(ty); })) return; SmallVector typeAttrs; typeAttrs.reserve(op->getOperands().size()); @@ -2626,7 +2692,7 @@ struct QuakeToQIRAPIFinalPass RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) signalPassFailure(); } }; diff --git a/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp b/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp index 25a3689252c..ae7f05db870 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp @@ -11,18 +11,23 @@ #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/Peephole.h" #include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Todo.h" #include "nlohmann/json.hpp" #include "llvm/ADT/SmallSet.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" +namespace cudaq::opt { +#define GEN_PASS_DEF_QIRTOQIRPROFILE +#define GEN_PASS_DEF_QIRTOQIRPROFILEFUNC +#define GEN_PASS_DEF_QIRTOQIRPROFILEPREP +#include "cudaq/Optimizer/CodeGen/Passes.h.inc" +} // namespace cudaq::opt + #define DEBUG_TYPE "qir-profile" /** @@ -49,7 +54,7 @@ static std::size_t getNumQubits(LLVM::CallOp callOp) { while (defOp && !dyn_cast(defOp)) defOp = defOp->getOperand(0).getDefiningOp(); if (auto constOp = dyn_cast_or_null(defOp)) - return constOp.getValue().cast().getValue().getLimitedValue(); + return cast(constOp.getValue()).getValue().getLimitedValue(); TODO_loc(callOp.getLoc(), "cannot compute number of qubits allocated"); } @@ -64,7 +69,7 @@ static bool isQIRSliceCall(Operation *op) { static std::optional sliceLowerBound(Operation *op) { Value low = op->getOperand(2); if (auto con = low.getDefiningOp()) - return con.getValue().cast().getInt(); + return cast(con.getValue()).getInt(); return {}; } @@ -179,7 +184,7 @@ struct FunctionProfileAnalysis { if (constVal) if (auto incr = constVal->getDefiningOp()) optQb = - allocOffset + incr.getValue().cast().getInt(); + allocOffset + cast(incr.getValue()).getInt(); } } if (optQb) { @@ -189,8 +194,8 @@ struct FunctionProfileAnalysis { auto resIdx = IntegerAttr::get(intTy, data.nResults); callOp->setAttr(resultIndexName, resIdx); auto regName = [&]() -> StringAttr { - if (auto nameAttr = callOp->getAttr(cudaq::opt::QIRRegisterNameAttr) - .dyn_cast_or_null()) + if (auto nameAttr = dyn_cast_if_present( + callOp->getAttr(cudaq::opt::QIRRegisterNameAttr))) return nameAttr; return {}; }(); @@ -219,7 +224,7 @@ struct AddFuncAttribute : public OpRewritePattern { // Add attributes to the function. auto iter = infoMap.find(op); assert(iter != infoMap.end()); - rewriter.startRootUpdate(op); + rewriter.startOpModification(op); const auto &info = iter->second; nlohmann::json resultQubitJSON{info.resultQubitVals}; bool isAdaptive = convertTo == "qir-adaptive"; @@ -227,20 +232,18 @@ struct AddFuncAttribute : public OpRewritePattern { auto requiredQubitsStr = std::to_string(info.nQubits); StringRef requiredQubitsStrRef = requiredQubitsStr; - if (auto stringAttr = - op->getAttr(cudaq::opt::qir0_1::RequiredQubitsAttrName) - .dyn_cast_or_null()) + if (auto stringAttr = dyn_cast_if_present( + op->getAttr(cudaq::opt::qir0_1::RequiredQubitsAttrName))) requiredQubitsStrRef = stringAttr; auto requiredResultsStr = std::to_string(info.nResults); StringRef requiredResultsStrRef = requiredResultsStr; - if (auto stringAttr = - op->getAttr(cudaq::opt::qir0_1::RequiredResultsAttrName) - .dyn_cast_or_null()) + if (auto stringAttr = dyn_cast_if_present( + op->getAttr(cudaq::opt::qir0_1::RequiredResultsAttrName))) requiredResultsStrRef = stringAttr; StringRef outputNamesStrRef; std::string resultQubitJSONStr; - if (auto strAttr = op->getAttr(cudaq::opt::QIROutputNamesAttrName) - .dyn_cast_or_null()) { + if (auto strAttr = dyn_cast_if_present( + op->getAttr(cudaq::opt::QIROutputNamesAttrName))) { outputNamesStrRef = strAttr; } else { resultQubitJSONStr = resultQubitJSON.dump(); @@ -271,7 +274,7 @@ struct AddFuncAttribute : public OpRewritePattern { auto builder = cudaq::IRBuilder::atBlockTerminator(&op.getBody().back()); auto loc = op.getBody().back().getTerminator()->getLoc(); - auto resultTy = cudaq::opt::getResultType(rewriter.getContext()); + auto resultTy = cudaq::cg::getLLVMResultType(rewriter.getContext()); auto i64Ty = rewriter.getI64Type(); auto module = op->getParentOfType(); for (auto &iv : info.resultQubitVals) { @@ -282,29 +285,31 @@ struct AddFuncAttribute : public OpRewritePattern { if (isAdaptive) builder.setInsertionPointAfter( info.resultOperation.find(iv.first)->getSecond()); - Value idx = builder.create(loc, i64Ty, iv.first); - Value ptr = builder.create(loc, resultTy, idx); + Value idx = LLVM::ConstantOp::create(builder, loc, i64Ty, iv.first); + Value ptr = LLVM::IntToPtrOp::create(builder, loc, resultTy, idx); auto regName = [&]() -> Value { - auto charPtrTy = cudaq::opt::getCharPointerType(builder.getContext()); + auto charPtrTy = + cudaq::cg::getLLVMCharPointerType(builder.getContext()); if (!rec.second.empty()) { // Note: it should be the case that this string literal has already // been added to the IR, so this step does not actually update the // module. auto globl = builder.genCStringLiteralAppendNul(loc, module, rec.second); - auto addrOf = builder.create( - loc, cudaq::opt::factory::getPointerType(globl.getType()), + auto addrOf = LLVM::AddressOfOp::create( + builder, loc, + cudaq::opt::factory::getPointerType(globl.getType()), globl.getName()); - return builder.create(loc, charPtrTy, addrOf); + return LLVM::BitcastOp::create(builder, loc, charPtrTy, addrOf); } - Value zero = builder.create(loc, i64Ty, 0); - return builder.create(loc, charPtrTy, zero); + Value zero = LLVM::ConstantOp::create(builder, loc, i64Ty, 0); + return LLVM::IntToPtrOp::create(builder, loc, charPtrTy, zero); }(); - builder.create(loc, TypeRange{}, - cudaq::opt::QIRRecordOutput, - ValueRange{ptr, regName}); + LLVM::CallOp::create(builder, loc, TypeRange{}, + cudaq::opt::QIRRecordOutput, + ValueRange{ptr, regName}); } - rewriter.finalizeRootUpdate(op); + rewriter.finalizeOpModification(op); return success(); } @@ -326,10 +331,10 @@ struct AddCallAttribute : public OpRewritePattern { auto startIter = info.allocationOffsets.find(op.getOperation()); assert(startIter != info.allocationOffsets.end()); auto startVal = startIter->second; - rewriter.startRootUpdate(op); + rewriter.startOpModification(op); op->setAttr(cudaq::opt::StartingOffsetAttrName, rewriter.getIntegerAttr(rewriter.getI64Type(), startVal)); - rewriter.finalizeRootUpdate(op); + rewriter.finalizeOpModification(op); return success(); } @@ -343,7 +348,8 @@ struct AddCallAttribute : public OpRewritePattern { /// calls are bijective with all distinct measurement calls in the original /// function, however the indices used may be renumbered and start at 0. struct QIRToQIRProfileFuncPass - : public cudaq::opt::QIRToQIRProfileFuncBase { + : public cudaq::opt::impl::QIRToQIRProfileFuncBase< + QIRToQIRProfileFuncPass> { using QIRToQIRProfileFuncBase::QIRToQIRProfileFuncBase; explicit QIRToQIRProfileFuncPass(llvm::StringRef convertTo_) @@ -408,15 +414,15 @@ struct ArrayGetElementPtrConv : public OpRewritePattern { if (!call) return failure(); auto loc = op.getLoc(); - if (call.getCallee()->equals(cudaq::opt::QIRArrayGetElementPtr1d)) { + if (call.getCallee() == cudaq::opt::QIRArrayGetElementPtr1d) { auto *alloc = call.getOperand(0).getDefiningOp(); if (!alloc->hasAttr(cudaq::opt::StartingOffsetAttrName)) return failure(); Value disp = call.getOperand(1); - Value off = rewriter.create( - loc, disp.getType(), + Value off = LLVM::ConstantOp::create( + rewriter, loc, disp.getType(), alloc->getAttr(cudaq::opt::StartingOffsetAttrName)); - Value qubit = rewriter.create(loc, off, disp); + Value qubit = LLVM::AddOp::create(rewriter, loc, off, disp); rewriter.replaceOpWithNewOp(op, op.getType(), qubit); return success(); } @@ -429,13 +435,13 @@ struct CallAlloc : public OpRewritePattern { LogicalResult matchAndRewrite(LLVM::CallOp call, PatternRewriter &rewriter) const override { - if (!call.getCallee()->equals(cudaq::opt::QIRQubitAllocate)) + if (call.getCallee() != cudaq::opt::QIRQubitAllocate) return failure(); if (!call->hasAttr(cudaq::opt::StartingOffsetAttrName)) return failure(); auto loc = call.getLoc(); - Value qubit = rewriter.create( - loc, rewriter.getI64Type(), + Value qubit = LLVM::ConstantOp::create( + rewriter, loc, rewriter.getI64Type(), call->getAttr(cudaq::opt::StartingOffsetAttrName)); auto resTy = call.getResult().getType(); rewriter.replaceOpWithNewOp(call, resTy, qubit); @@ -454,10 +460,10 @@ struct ZCtrlOneTargetToCZ : public OpRewritePattern { PatternRewriter &rewriter) const override { ValueRange args(call.getArgOperands()); if (args.size() == 4 && call.getCallee() && - call.getCallee()->equals(cudaq::opt::NVQIRInvokeWithControlBits)) { + call.getCallee() == cudaq::opt::NVQIRInvokeWithControlBits) { if (auto addrOf = dyn_cast_or_null( args[1].getDefiningOp())) { - if (addrOf.getGlobalName().startswith( + if (addrOf.getGlobalName().starts_with( std::string(cudaq::opt::QIRQISPrefix) + "z__ctl")) { rewriter.replaceOpWithNewOp( call, TypeRange{}, cudaq::opt::QIRCZ, args.drop_front(2)); @@ -476,7 +482,7 @@ struct ZCtrlOneTargetToCZ : public OpRewritePattern { /// DAGs in the IR and replace them to meet the requirements of the base /// profile. The patterns are defined in Peephole.td. struct QIRToQIRProfileQIRPass - : public cudaq::opt::QIRToQIRProfileBase { + : public cudaq::opt::impl::QIRToQIRProfileBase { explicit QIRToQIRProfileQIRPass() = default; /// @brief Construct pass @@ -499,7 +505,7 @@ struct QIRToQIRProfileQIRPass XCtrlOneTargetToCNot, ZCtrlOneTargetToCZ>(context); if (convertTo.getValue() == "qir-adaptive") patterns.insert(context); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After QIR profile:\n" << *op << '\n'); } @@ -530,7 +536,8 @@ static constexpr std::array measurementFunctionNames{ cudaq::opt::QIRMeasureToRegister}; struct QIRProfilePreparationPass - : public cudaq::opt::QIRToQIRProfilePrepBase { + : public cudaq::opt::impl::QIRToQIRProfilePrepBase< + QIRProfilePreparationPass> { void runOnOperation() override { ModuleOp module = getOperation(); @@ -539,27 +546,30 @@ struct QIRProfilePreparationPass // Add cnot declaration as it may be referenced after peepholes run. cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRCnot, LLVM::LLVMVoidType::get(ctx), - {cudaq::opt::getQubitType(ctx), cudaq::opt::getQubitType(ctx)}, module); + {cudaq::cg::getLLVMQubitType(ctx), cudaq::cg::getLLVMQubitType(ctx)}, + module); // Add cz declaration as it may be referenced after peepholes run. cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRCZ, LLVM::LLVMVoidType::get(ctx), - {cudaq::opt::getQubitType(ctx), cudaq::opt::getQubitType(ctx)}, module); + {cudaq::cg::getLLVMQubitType(ctx), cudaq::cg::getLLVMQubitType(ctx)}, + module); // Add measure_body as it has a different signature than measure. cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRMeasureBody, LLVM::LLVMVoidType::get(ctx), - {cudaq::opt::getQubitType(ctx), cudaq::opt::getResultType(ctx)}, + {cudaq::cg::getLLVMQubitType(ctx), cudaq::cg::getLLVMResultType(ctx)}, module); cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::qir0_1::ReadResultBody, IntegerType::get(ctx, 1), - {cudaq::opt::getResultType(ctx)}, module); + {cudaq::cg::getLLVMResultType(ctx)}, module); // Add record functions for any measurements. cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRRecordOutput, LLVM::LLVMVoidType::get(ctx), - {cudaq::opt::getResultType(ctx), cudaq::opt::getCharPointerType(ctx)}, + {cudaq::cg::getLLVMResultType(ctx), + cudaq::cg::getLLVMCharPointerType(ctx)}, module); // Add functions `__quantum__qis__*__body` for all functions matching diff --git a/lib/Optimizer/CodeGen/DelayMeasurements.cpp b/lib/Optimizer/CodeGen/DelayMeasurements.cpp index 1760e115484..1883c3b429e 100644 --- a/lib/Optimizer/CodeGen/DelayMeasurements.cpp +++ b/lib/Optimizer/CodeGen/DelayMeasurements.cpp @@ -8,7 +8,6 @@ #include "PassDetails.h" #include "cudaq/Optimizer/CodeGen/Passes.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" namespace cudaq::opt { #define GEN_PASS_DEF_DELAYMEASUREMENTS diff --git a/lib/Optimizer/CodeGen/EliminateDeadHeapCopy.cpp b/lib/Optimizer/CodeGen/EliminateDeadHeapCopy.cpp index 981a45b0f48..a2275f66698 100644 --- a/lib/Optimizer/CodeGen/EliminateDeadHeapCopy.cpp +++ b/lib/Optimizer/CodeGen/EliminateDeadHeapCopy.cpp @@ -8,15 +8,14 @@ #include "PassDetails.h" #include "cudaq/Optimizer/CodeGen/Passes.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" - -#define DEBUG_TYPE "eliminate-dead-heap-copy" namespace cudaq::opt { #define GEN_PASS_DEF_ELIMINATEDEADHEAPCOPY #include "cudaq/Optimizer/CodeGen/Passes.h.inc" } // namespace cudaq::opt +#define DEBUG_TYPE "eliminate-dead-heap-copy" + using namespace mlir; namespace { diff --git a/lib/Optimizer/CodeGen/OptUtils.cpp b/lib/Optimizer/CodeGen/OptUtils.cpp index 74ba16ec161..a8f2674dcce 100644 --- a/lib/Optimizer/CodeGen/OptUtils.cpp +++ b/lib/Optimizer/CodeGen/OptUtils.cpp @@ -18,13 +18,11 @@ //===----------------------------------------------------------------------===// #include "cudaq/Optimizer/CodeGen/OptUtils.h" - #include "llvm/IR/Module.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/Error.h" #include "llvm/Target/TargetMachine.h" -#include using namespace llvm; @@ -54,6 +52,7 @@ static std::optional mapToLevel(unsigned optLevel, } return std::nullopt; } + // Create and return a lambda that uses LLVM pass manager builder to set up // optimizations based on the given level. std::function diff --git a/lib/Optimizer/CodeGen/PassDetails.h b/lib/Optimizer/CodeGen/PassDetails.h index e0fb0d4e4fc..038736d792f 100644 --- a/lib/Optimizer/CodeGen/PassDetails.h +++ b/lib/Optimizer/CodeGen/PassDetails.h @@ -10,9 +10,15 @@ #include "cudaq/Optimizer/CodeGen/CodeGenDialect.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/OpenACC/OpenACC.h" @@ -22,7 +28,7 @@ namespace cudaq::opt { -#define GEN_PASS_CLASSES -#include "cudaq/Optimizer/CodeGen/Passes.h.inc" +// Note: Individual pass implementations should define their specific pass +// using #define GEN_PASS_DEF_ before including Passes.h.inc } // namespace cudaq::opt diff --git a/lib/Optimizer/CodeGen/Passes.cpp b/lib/Optimizer/CodeGen/Passes.cpp index 8ff6c53c2d1..1bdf2f24363 100644 --- a/lib/Optimizer/CodeGen/Passes.cpp +++ b/lib/Optimizer/CodeGen/Passes.cpp @@ -15,88 +15,72 @@ using namespace mlir; static void addAnyonPPipeline(OpPassManager &pm) { - using namespace cudaq::opt; - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { "h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "z(1)", }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } static void addAnyonCPipeline(OpPassManager &pm) { - using namespace cudaq::opt; - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { "h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "x(1)", }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } static void addOQCPipeline(OpPassManager &pm) { - using namespace cudaq::opt; - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { // TODO: make this our native gate set "h", "s", "t", "r1", "rx", "ry", "rz", "x", "y", "z", "x(1)", }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } static void addQCIPipeline(OpPassManager &pm) { - using namespace cudaq::opt; // Note: QCI's basis gate set is "sx", "rz", "cz", but QCI currently has // a transpiler converting all other gates to that basis. // We use the gate set below so we can translate all gates to QIR. - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { "h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "x(1)", }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } static void addQuantinuumPipeline(OpPassManager &pm) { - using namespace cudaq::opt; - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { "h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "x(1)", }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } static void addIQMPipeline(OpPassManager &pm) { - using namespace cudaq::opt; - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { "phased_rx", "z(1)", }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } static void addIonQPipeline(OpPassManager &pm) { - using namespace cudaq::opt; - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { "h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "x(1)", // TODO set to ms, gpi, gpi2 }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } static void addFermioniqPipeline(OpPassManager &pm) { - using namespace cudaq::opt; - std::string basis[] = { + cudaq::opt::BasisConversionOptions options; + options.basis = { "h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "x(1)", }; - BasisConversionOptions options; - options.basis = basis; pm.addPass(createBasisConversion(options)); } diff --git a/lib/Optimizer/CodeGen/PeepholePatterns.inc b/lib/Optimizer/CodeGen/PeepholePatterns.inc index 3e408af8375..67beb82e772 100644 --- a/lib/Optimizer/CodeGen/PeepholePatterns.inc +++ b/lib/Optimizer/CodeGen/PeepholePatterns.inc @@ -27,9 +27,10 @@ struct XCtrlOneTargetToCNot : public OpRewritePattern { return failure(); auto *ctx = rewriter.getContext(); auto funcSymbol = FlatSymbolRefAttr::get(ctx, cudaq::opt::QIRCnot); - rewriter.replaceOpWithNewOp( - call, TypeRange{}, funcSymbol, args.drop_front(2), - call.getFastmathFlagsAttr(), call.getBranchWeightsAttr()); + LLVM::CallOp::Properties properties = call.getProperties(); + properties.setCallee(funcSymbol); + rewriter.replaceOpWithNewOp(call, TypeRange{}, + args.drop_front(2), properties); return success(); } }; @@ -69,13 +70,14 @@ struct CalleeConv : public OpRewritePattern { if (!callee) return failure(); if (!needsToBeRenamed(*callee) || - callee->startswith(cudaq::opt::QIRMeasure)) + callee->starts_with(cudaq::opt::QIRMeasure)) return failure(); auto *ctx = rewriter.getContext(); auto symbol = FlatSymbolRefAttr::get(ctx, callee->str() + "__body"); - rewriter.replaceOpWithNewOp( - call, TypeRange{}, symbol, call.getOperands(), - call.getFastmathFlagsAttr(), call.getBranchWeightsAttr()); + LLVM::CallOp::Properties properties = call.getProperties(); + properties.setCallee(symbol); + rewriter.replaceOpWithNewOp(call, TypeRange{}, + call.getOperands(), properties); return success(); } }; @@ -119,7 +121,7 @@ struct EraseArrayAlloc : public OpRewritePattern { return failure(); auto *ctx = rewriter.getContext(); rewriter.replaceOpWithNewOp(call, - cudaq::opt::getArrayType(ctx)); + cudaq::cg::getLLVMArrayType(ctx)); return success(); } }; @@ -224,7 +226,7 @@ struct LoadMeasureResult : public OpRewritePattern { if (bitcast.getType() != cudaq::opt::factory::getPointerType(IntegerType::get(ctx, 1))) return failure(); - if (inttoptr.getType() != cudaq::opt::getResultType(ctx)) + if (inttoptr.getType() != cudaq::cg::getLLVMResultType(ctx)) return failure(); if (!isa(conint.getValue())) return failure(); diff --git a/lib/Optimizer/CodeGen/Pipelines.cpp b/lib/Optimizer/CodeGen/Pipelines.cpp index 377b52b7797..29efd35699f 100644 --- a/lib/Optimizer/CodeGen/Pipelines.cpp +++ b/lib/Optimizer/CodeGen/Pipelines.cpp @@ -162,7 +162,9 @@ void cudaq::opt::createPipelineTransformsForPythonToOpenQASM( pm.addNestedPass(createCSEPass()); pm.addNestedPass(createMultiControlDecomposition()); pm.addPass(createDecomposition( - {.basis = {"h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "x(1)"}})); + {.basis = {"h", "s", "t", "rx", "ry", "rz", "x", "y", "z", "x(1)"}, + .disabledPatterns = {}, + .enabledPatterns = {}})); pm.addPass(createQuakeToCCPrep()); pm.addNestedPass(createCanonicalizerPass()); pm.addNestedPass(createExpandControlVeqs()); diff --git a/lib/Optimizer/CodeGen/QirInsertArrayRecord.cpp b/lib/Optimizer/CodeGen/QirInsertArrayRecord.cpp index 77e4f5b77ff..1c7d19ae0c5 100644 --- a/lib/Optimizer/CodeGen/QirInsertArrayRecord.cpp +++ b/lib/Optimizer/CodeGen/QirInsertArrayRecord.cpp @@ -43,14 +43,14 @@ static LogicalResult insertArrayRecordingCall(OpBuilder &builder, std::string labelStr = "array"; auto strLitTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get( builder.getContext(), builder.getI8Type(), labelStr.size() + 1)); - Value lit = builder.create( - loc, strLitTy, builder.getStringAttr(labelStr)); + Value lit = cudaq::cc::CreateStringLiteralOp::create( + builder, loc, strLitTy, builder.getStringAttr(labelStr)); auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - Value label = builder.create(loc, i8PtrTy, lit); - Value size = builder.create(loc, resultCount, 64); - builder.create(loc, TypeRange{}, - cudaq::opt::QIRArrayRecordOutput, - ArrayRef{size, label}); + Value label = cudaq::cc::CastOp::create(builder, loc, i8PtrTy, lit); + Value size = arith::ConstantIntOp::create(builder, loc, resultCount, 64); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::opt::QIRArrayRecordOutput, + ArrayRef{size, label}); return success(); } diff --git a/lib/Optimizer/CodeGen/QuakeToCodegen.cpp b/lib/Optimizer/CodeGen/QuakeToCodegen.cpp index e9503b31559..e3d2308eaea 100644 --- a/lib/Optimizer/CodeGen/QuakeToCodegen.cpp +++ b/lib/Optimizer/CodeGen/QuakeToCodegen.cpp @@ -8,14 +8,12 @@ #include "QuakeToCodegen.h" #include "CodeGenOps.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" -#include "mlir/Dialect/Complex/IR/Complex.h" using namespace mlir; @@ -55,11 +53,11 @@ class ExpandComplexCast : public OpRewritePattern { return failure(); auto loc = castOp.getLoc(); auto ty = cast(castOp.getValue().getType()).getElementType(); - Value rePart = rewriter.create(loc, ty, castOp.getValue()); - Value imPart = rewriter.create(loc, ty, castOp.getValue()); + Value rePart = complex::ReOp::create(rewriter, loc, ty, castOp.getValue()); + Value imPart = complex::ImOp::create(rewriter, loc, ty, castOp.getValue()); auto eleTy = complexTy.getElementType(); - auto reCast = rewriter.create(loc, eleTy, rePart); - auto imCast = rewriter.create(loc, eleTy, imPart); + auto reCast = cudaq::cc::CastOp::create(rewriter, loc, eleTy, rePart); + auto imCast = cudaq::cc::CastOp::create(rewriter, loc, eleTy, imPart); rewriter.replaceOpWithNewOp(castOp, complexTy, reCast, imCast); return success(); @@ -108,7 +106,7 @@ class CreateStateOpPattern : public OpRewritePattern { auto stateTy = quake::StateType::get(ctx); auto statePtrTy = cudaq::cc::PointerType::get(stateTy); auto i8PtrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); - auto cast = rewriter.create(loc, i8PtrTy, buffer); + auto cast = cudaq::cc::CastOp::create(rewriter, loc, i8PtrTy, buffer); rewriter.replaceOpWithNewOp( createStateOp, statePtrTy, createStateFunc, ValueRange{cast, size}); @@ -130,7 +128,7 @@ class DeleteStateOpPattern : public OpRewritePattern { auto result = irBuilder.loadIntrinsic(module, cudaq::deleteCudaqState); assert(succeeded(result) && "loading intrinsic should never fail"); - rewriter.replaceOpWithNewOp(deleteStateOp, std::nullopt, + rewriter.replaceOpWithNewOp(deleteStateOp, mlir::TypeRange{}, cudaq::deleteCudaqState, mlir::ValueRange{state}); return success(); diff --git a/lib/Optimizer/CodeGen/QuakeToExecMgr.cpp b/lib/Optimizer/CodeGen/QuakeToExecMgr.cpp index 8dbeac4659c..8c8dc564234 100644 --- a/lib/Optimizer/CodeGen/QuakeToExecMgr.cpp +++ b/lib/Optimizer/CodeGen/QuakeToExecMgr.cpp @@ -7,11 +7,10 @@ ******************************************************************************/ #include "cudaq/Optimizer/CodeGen/QuakeToExecMgr.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/CodeGen/CudaqFunctionNames.h" #include "cudaq/Optimizer/CodeGen/Passes.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" @@ -40,47 +39,49 @@ static Value packQubitSpans(Location loc, ConversionPatternRewriter &rewriter, auto qspanTy = cudaq::opt::getCudaqQubitSpanType(rewriter.getContext()); Value newspan; if (operands.empty()) { - newspan = rewriter.create(loc, qspanTy); - auto zero = rewriter.create(loc, 0, 64); - auto nullPtrVal = rewriter.create( - loc, cudaq::opt::getCudaqQubitType(rewriter.getContext()), zero); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMWriteToSpan, - ValueRange{newspan, nullPtrVal, zero}); + newspan = cudaq::cc::AllocaOp::create(rewriter, loc, qspanTy); + auto zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + auto nullPtrVal = cudaq::cc::CastOp::create( + rewriter, loc, cudaq::opt::getCudaqQubitType(rewriter.getContext()), + zero); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMWriteToSpan, + ValueRange{newspan, nullPtrVal, zero}); } else if (operands.size() == 1) { // Nothing to concatenate in this case. newspan = operands[0]; } else { - newspan = rewriter.create(loc, qspanTy); + newspan = cudaq::cc::AllocaOp::create(rewriter, loc, qspanTy); // Loop over all arguments and count the number of qubits. - Value zero = rewriter.create(loc, 0, 64); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); Value sum = zero; auto i64Ty = rewriter.getI64Type(); auto ptrI64Ty = cudaq::cc::PointerType::get(i64Ty); for (auto v : operands) { - auto sizePtr = rewriter.create( - loc, ptrI64Ty, v, ArrayRef{1}); - auto size = rewriter.create(loc, sizePtr); - sum = rewriter.create(loc, sum, size); + auto sizePtr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrI64Ty, v, ArrayRef{1}); + auto size = cudaq::cc::LoadOp::create(rewriter, loc, sizePtr); + sum = arith::AddIOp::create(rewriter, loc, sum, size); } // Allocate a fresh buffer. - auto newBuffer = rewriter.create(loc, i64Ty, sum); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMWriteToSpan, - ValueRange{newspan, newBuffer, sum}); + auto newBuffer = cudaq::cc::AllocaOp::create(rewriter, loc, i64Ty, sum); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMWriteToSpan, + ValueRange{newspan, newBuffer, sum}); // Copy the i64 values to the new buffer. sum = zero; Value size = zero; for (auto v : operands) { - auto dest = rewriter.create( - loc, ptrI64Ty, newBuffer, ArrayRef{sum}); - auto sizePtr = rewriter.create( - loc, ptrI64Ty, v, ArrayRef{1}); - size = rewriter.create(loc, sizePtr); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMConcatSpan, - ValueRange{dest, v, size}); - sum = rewriter.create(loc, sum, size); + auto dest = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrI64Ty, newBuffer, + ArrayRef{sum}); + auto sizePtr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrI64Ty, v, ArrayRef{1}); + size = cudaq::cc::LoadOp::create(rewriter, loc, sizePtr); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMConcatSpan, + ValueRange{dest, v, size}); + sum = arith::AddIOp::create(rewriter, loc, sum, size); } } return newspan; @@ -107,19 +108,20 @@ class AllocaOpRewrite : public OpConversionPattern { auto loc = alloca.getLoc(); auto i64Ty = rewriter.getI64Type(); auto qspanTy = cudaq::opt::getCudaqQubitSpanType(rewriter.getContext()); - Value qspan = rewriter.create(loc, qspanTy); + Value qspan = cudaq::cc::AllocaOp::create(rewriter, loc, qspanTy); if (auto resultType = dyn_cast(alloca.getType())) { - auto one = rewriter.create(loc, 1, 64); - Value buffer = rewriter.create(loc, i64Ty, one); - auto call = rewriter.create( - loc, i64Ty, cudaq::opt::CudaqEMAllocate, ValueRange{}); + auto one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); + Value buffer = cudaq::cc::AllocaOp::create(rewriter, loc, i64Ty, one); + auto call = func::CallOp::create( + rewriter, loc, i64Ty, cudaq::opt::CudaqEMAllocate, ValueRange{}); auto ptrI64Ty = cudaq::cc::PointerType::get(i64Ty); - auto toAddr = rewriter.create( - loc, ptrI64Ty, buffer, ArrayRef{0}); - rewriter.create(loc, call.getResult(0), toAddr); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMWriteToSpan, - ValueRange{qspan, buffer, one}); + auto toAddr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrI64Ty, buffer, + ArrayRef{0}); + cudaq::cc::StoreOp::create(rewriter, loc, call.getResult(0), toAddr); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMWriteToSpan, + ValueRange{qspan, buffer, one}); } else { Value sizeOperand; if (adaptor.getOperands().empty()) { @@ -127,25 +129,26 @@ class AllocaOpRewrite : public OpConversionPattern { assert(type.hasSpecifiedSize() && "veq must have a constant size"); auto constantSize = type.getSize(); sizeOperand = - rewriter.create(loc, constantSize, 64); + arith::ConstantIntOp::create(rewriter, loc, constantSize, 64); } else if (auto intSizeTy = dyn_cast(adaptor.getSize().getType())) { sizeOperand = adaptor.getSize(); if (intSizeTy.getWidth() != 64) - sizeOperand = rewriter.create( - loc, i64Ty, sizeOperand, cudaq::cc::CastOpMode::Unsigned); + sizeOperand = + cudaq::cc::CastOp::create(rewriter, loc, i64Ty, sizeOperand, + cudaq::cc::CastOpMode::Unsigned); } if (!sizeOperand) return failure(); Value buffer = - rewriter.create(loc, i64Ty, sizeOperand); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMWriteToSpan, - ValueRange{qspan, buffer, sizeOperand}); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMAllocateVeq, - ValueRange{qspan, sizeOperand}); + cudaq::cc::AllocaOp::create(rewriter, loc, i64Ty, sizeOperand); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMWriteToSpan, + ValueRange{qspan, buffer, sizeOperand}); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMAllocateVeq, + ValueRange{qspan, sizeOperand}); } rewriter.replaceOp(alloca, qspan); return success(); @@ -160,7 +163,7 @@ class DeallocOpRewrite : public OpConversionPattern { matchAndRewrite(quake::DeallocOp dealloc, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp( - dealloc, std::nullopt, cudaq::opt::CudaqEMReturn, + dealloc, mlir::TypeRange{}, cudaq::opt::CudaqEMReturn, ValueRange{adaptor.getReference()}); return success(); } @@ -205,8 +208,8 @@ class ExtractRefOpRewrite : public OpConversionPattern { auto loc = extract.getLoc(); auto offset = [&]() -> Value { if (extract.hasConstantIndex()) - return rewriter.create( - loc, extract.getConstantIndex(), 64); + return arith::ConstantIntOp::create(rewriter, loc, + extract.getConstantIndex(), 64); return adaptor.getIndex(); }(); @@ -218,18 +221,19 @@ class ExtractRefOpRewrite : public OpConversionPattern { auto ptrptrTy = cudaq::cc::PointerType::get(ptrArrTy); auto qspan = adaptor.getVeq(); - auto qspanDataPtr = rewriter.create( - loc, ptrptrTy, qspan, ArrayRef{0}); - auto qspanData = rewriter.create(loc, qspanDataPtr); - auto buffer = rewriter.create( - loc, ptrI64Ty, qspanData, ArrayRef{offset}); + auto qspanDataPtr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrptrTy, qspan, ArrayRef{0}); + auto qspanData = cudaq::cc::LoadOp::create(rewriter, loc, qspanDataPtr); + auto buffer = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrI64Ty, qspanData, + ArrayRef{offset}); auto qspanTy = cudaq::opt::getCudaqQubitSpanType(rewriter.getContext()); - Value newspan = rewriter.create(loc, qspanTy); - auto one = rewriter.create(loc, 1, 64); - auto buf1 = rewriter.create(loc, ptrArrTy, buffer); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMWriteToSpan, - ValueRange{newspan, buf1, one}); + Value newspan = cudaq::cc::AllocaOp::create(rewriter, loc, qspanTy); + auto one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); + auto buf1 = cudaq::cc::CastOp::create(rewriter, loc, ptrArrTy, buffer); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMWriteToSpan, + ValueRange{newspan, buf1, one}); rewriter.replaceOp(extract, newspan); return success(); } @@ -248,35 +252,37 @@ class SubveqOpRewrite : public OpConversionPattern { auto loc = subveq.getLoc(); auto up = [&]() -> Value { if (!adaptor.getUpper()) - return rewriter.create(loc, adaptor.getRawUpper(), - 64); + return arith::ConstantIntOp::create(rewriter, loc, + adaptor.getRawUpper(), 64); return adaptor.getUpper(); }(); auto lo = [&]() -> Value { if (!adaptor.getLower()) - return rewriter.create(loc, adaptor.getRawLower(), - 64); + return arith::ConstantIntOp::create(rewriter, loc, + adaptor.getRawLower(), 64); return adaptor.getLower(); }(); - auto diff = rewriter.create(loc, up, lo); - auto one = rewriter.create(loc, 1, 64); - auto length = rewriter.create(loc, diff, one); + auto diff = arith::SubIOp::create(rewriter, loc, up, lo); + auto one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); + auto length = arith::AddIOp::create(rewriter, loc, diff, one); // Compute the pointer to the first element in the subveq and build a new // array type. auto i64Ty = rewriter.getI64Type(); auto ptrI64Ty = cudaq::cc::PointerType::get(i64Ty); auto ptrTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(i64Ty)); auto ptrptrTy = cudaq::cc::PointerType::get(ptrTy); - auto qspanDataPtr = rewriter.create( - loc, ptrptrTy, adaptor.getVeq(), ArrayRef{0}); - auto qspanData = rewriter.create(loc, qspanDataPtr); - auto buffer = rewriter.create( - loc, ptrI64Ty, qspanData, ArrayRef{lo}); + auto qspanDataPtr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrptrTy, adaptor.getVeq(), + ArrayRef{0}); + auto qspanData = cudaq::cc::LoadOp::create(rewriter, loc, qspanDataPtr); + auto buffer = + cudaq::cc::ComputePtrOp::create(rewriter, loc, ptrI64Ty, qspanData, + ArrayRef{lo}); auto qspanTy = cudaq::opt::getCudaqQubitSpanType(rewriter.getContext()); - Value newspan = rewriter.create(loc, qspanTy); - rewriter.create(loc, std::nullopt, - cudaq::opt::CudaqEMWriteToSpan, - ValueRange{newspan, buffer, length}); + Value newspan = cudaq::cc::AllocaOp::create(rewriter, loc, qspanTy); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::CudaqEMWriteToSpan, + ValueRange{newspan, buffer, length}); rewriter.replaceOp(subveq, newspan); return success(); } @@ -289,8 +295,9 @@ class ResetRewrite : public OpConversionPattern { LogicalResult matchAndRewrite(quake::ResetOp resetOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp( - resetOp, std::nullopt, cudaq::opt::CudaqEMReset, adaptor.getOperands()); + rewriter.replaceOpWithNewOp(resetOp, mlir::TypeRange{}, + cudaq::opt::CudaqEMReset, + adaptor.getOperands()); return success(); } }; @@ -311,27 +318,30 @@ class GenericRewrite : public OpConversionPattern { auto i8Ty = rewriter.getI8Type(); auto ptrI8Ty = cudaq::cc::PointerType::get(i8Ty); auto regTy = cudaq::cc::PointerType::get(opName.getType()); - auto addr = rewriter.create(loc, regTy, - opName.getSymName()); - auto opString = rewriter.create(loc, ptrI8Ty, addr); + auto addr = cudaq::cc::AddressOfOp::create(rewriter, loc, regTy, + opName.getSymName()); + auto opString = cudaq::cc::CastOp::create(rewriter, loc, ptrI8Ty, addr); auto paramSize = adaptor.getParameters().size(); - Value numParams = rewriter.create(loc, paramSize, 64); + Value numParams = + arith::ConstantIntOp::create(rewriter, loc, paramSize, 64); auto f64Ty = rewriter.getF64Type(); auto arrF64Ty = cudaq::cc::ArrayType::get(f64Ty); auto ptrParamTy = cudaq::cc::PointerType::get(arrF64Ty); auto ptrF64Ty = cudaq::cc::PointerType::get(f64Ty); auto params = [&]() -> Value { if (paramSize == 0) { - auto zero = rewriter.create(loc, paramSize, 64); - return rewriter.create(loc, ptrParamTy, zero); + auto zero = arith::ConstantIntOp::create(rewriter, loc, paramSize, 64); + return cudaq::cc::CastOp::create(rewriter, loc, ptrParamTy, zero); } - auto buffer = rewriter.create(loc, f64Ty, numParams); + auto buffer = + cudaq::cc::AllocaOp::create(rewriter, loc, f64Ty, numParams); for (auto iter : llvm::enumerate(adaptor.getParameters())) { std::int32_t i = iter.index(); auto p = iter.value(); - auto ptr = rewriter.create( - loc, ptrF64Ty, buffer, ArrayRef{i}); - rewriter.create(loc, p, ptr); + auto ptr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrF64Ty, buffer, + ArrayRef{i}); + cudaq::cc::StoreOp::create(rewriter, loc, p, ptr); } return buffer; }(); @@ -339,11 +349,11 @@ class GenericRewrite : public OpConversionPattern { auto targets = packQubitSpans(loc, rewriter, adaptor.getTargets()); auto isAdj = [&]() -> Value { if (qop.isAdj()) - return rewriter.create(loc, 1, 1); - return rewriter.create(loc, 0, 1); + return arith::ConstantIntOp::create(rewriter, loc, 1, 1); + return arith::ConstantIntOp::create(rewriter, loc, 0, 1); }(); rewriter.template replaceOpWithNewOp( - qop, std::nullopt, cudaq::opt::CudaqEMApply, + qop, mlir::TypeRange{}, cudaq::opt::CudaqEMApply, ValueRange{opString, numParams, params, controls, targets, isAdj}); return success(); } @@ -392,9 +402,9 @@ class MzOpRewrite : public OpConversionPattern { auto i8Ty = rewriter.getI8Type(); auto ptrI8Ty = cudaq::cc::PointerType::get(i8Ty); auto regTy = cudaq::cc::PointerType::get(regName.getType()); - auto addr = rewriter.create(loc, regTy, - regName.getSymName()); - auto nameAddr = rewriter.create(loc, ptrI8Ty, addr); + auto addr = cudaq::cc::AddressOfOp::create(rewriter, loc, regTy, + regName.getSymName()); + auto nameAddr = cudaq::cc::CastOp::create(rewriter, loc, ptrI8Ty, addr); auto i32Ty = rewriter.getI32Type(); rewriter.replaceOpWithNewOp( mzOp, i32Ty, cudaq::opt::CudaqEMMeasure, @@ -410,7 +420,7 @@ class MxToMzRewrite : public OpRewritePattern { LogicalResult matchAndRewrite(quake::MxOp mx, PatternRewriter &rewriter) const override { - rewriter.create(mx.getLoc(), mx.getTargets()); + quake::HOp::create(rewriter, mx.getLoc(), mx.getTargets()); rewriter.replaceOpWithNewOp( mx, mx.getResultTypes(), mx.getTargets(), mx.getRegisterNameAttr()); return success(); @@ -424,9 +434,9 @@ class MyToMzRewrite : public OpRewritePattern { LogicalResult matchAndRewrite(quake::MyOp my, PatternRewriter &rewriter) const override { - rewriter.create(my.getLoc(), true, ValueRange{}, ValueRange{}, - my.getTargets()); - rewriter.create(my.getLoc(), my.getTargets()); + quake::SOp::create(rewriter, my.getLoc(), true, ValueRange{}, ValueRange{}, + my.getTargets()); + quake::HOp::create(rewriter, my.getLoc(), my.getTargets()); rewriter.replaceOpWithNewOp( my, my.getResultTypes(), my.getTargets(), my.getRegisterNameAttr()); return success(); @@ -443,8 +453,9 @@ class VeqSizeOpRewrite : public OpConversionPattern { auto loc = vecsize->getLoc(); auto i64Ty = rewriter.getI64Type(); auto ptrI64Ty = cudaq::cc::PointerType::get(i64Ty); - auto sizeptr = rewriter.create( - loc, ptrI64Ty, adaptor.getVeq(), ArrayRef{1}); + auto sizeptr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrI64Ty, adaptor.getVeq(), + ArrayRef{1}); rewriter.replaceOpWithNewOp(vecsize, sizeptr); return success(); } diff --git a/lib/Optimizer/CodeGen/QuakeToLLVM.cpp b/lib/Optimizer/CodeGen/QuakeToLLVM.cpp index 32d845d2b6a..43ae3e03a73 100644 --- a/lib/Optimizer/CodeGen/QuakeToLLVM.cpp +++ b/lib/Optimizer/CodeGen/QuakeToLLVM.cpp @@ -8,14 +8,13 @@ #include "cudaq/Optimizer/CodeGen/QuakeToLLVM.h" #include "CodeGenOps.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h" #include "cudaq/Optimizer/CodeGen/QuakeToExecMgr.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" @@ -46,19 +45,19 @@ class AllocaOpRewrite : public ConvertOpToLLVMPattern { dyn_cast_if_present(alloca.getResult().getType())) { StringRef qirQubitAllocate = cudaq::opt::QIRQubitAllocate; - auto qubitType = cudaq::opt::getQubitType(context); + auto qubitType = cudaq::cg::getLLVMQubitType(context); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qirQubitAllocate, qubitType, {}, parentModule); rewriter.replaceOpWithNewOp(alloca, qubitType, symbolRef, - std::nullopt); + ValueRange{}); return success(); } // Create a QIR call to allocate the qubits. StringRef qir_qubit_array_allocate = cudaq::opt::QIRArrayQubitAllocateArray; - auto array_qbit_type = cudaq::opt::getArrayType(context); + auto array_qbit_type = cudaq::cg::getLLVMArrayType(context); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qir_qubit_array_allocate, array_qbit_type, {rewriter.getI64Type()}, parentModule); @@ -70,13 +69,12 @@ class AllocaOpRewrite : public ConvertOpToLLVMPattern { auto type = cast(alloca.getResult().getType()); auto constantSize = type.getSize(); sizeOperand = - rewriter.create(loc, constantSize, 64); + arith::ConstantIntOp::create(rewriter, loc, constantSize, 64); } else { sizeOperand = adaptor.getOperands().front(); - if (cast(sizeOperand.getType()).getWidth() < 64) { - sizeOperand = rewriter.create(loc, rewriter.getI64Type(), - sizeOperand); - } + if (cast(sizeOperand.getType()).getWidth() < 64) + sizeOperand = LLVM::ZExtOp::create(rewriter, loc, rewriter.getI64Type(), + sizeOperand); } // Replace the AllocaOp with the QIR call. @@ -98,7 +96,7 @@ class QmemRAIIOpRewrite ConversionPatternRewriter &rewriter) const override { auto loc = raii->getLoc(); auto parentModule = raii->getParentOfType(); - auto array_qbit_type = cudaq::opt::getArrayType(rewriter.getContext()); + auto array_qbit_type = cudaq::cg::getLLVMArrayType(rewriter.getContext()); // Get the CC Pointer for the state auto ccState = adaptor.getInitState(); @@ -139,14 +137,14 @@ class QmemRAIIOpRewrite sizeOperand = allocSize; auto sizeTy = cast(sizeOperand.getType()); if (sizeTy.getWidth() < 64) - sizeOperand = rewriter.create(loc, i64Ty, sizeOperand); + sizeOperand = LLVM::ZExtOp::create(rewriter, loc, i64Ty, sizeOperand); else if (sizeTy.getWidth() > 64) - sizeOperand = rewriter.create(loc, i64Ty, sizeOperand); + sizeOperand = LLVM::TruncOp::create(rewriter, loc, i64Ty, sizeOperand); } else { auto type = cast(allocTy); auto constantSize = type.getSize(); sizeOperand = - rewriter.create(loc, constantSize, 64); + arith::ConstantIntOp::create(rewriter, loc, constantSize, 64); } // Create QIR allocation with initializer function. @@ -159,7 +157,7 @@ class QmemRAIIOpRewrite // Call the allocation function Value castedInitState = - rewriter.create(loc, ptrTy, ccState); + LLVM::BitcastOp::create(rewriter, loc, ptrTy, ccState); rewriter.replaceOpWithNewOp( raii, array_qbit_type, raiiSymbolRef, ArrayRef{sizeOperand, castedInitState}); @@ -185,10 +183,10 @@ class DeallocOpRewrite : public ConvertOpToLLVMPattern { Type operandType, qType = dealloc.getOperand().getType(); if (isa(qType)) { qirQuantumDeallocateFunc = cudaq::opt::QIRArrayQubitReleaseArray; - operandType = cudaq::opt::getArrayType(context); + operandType = cudaq::cg::getLLVMArrayType(context); } else { qirQuantumDeallocateFunc = cudaq::opt::QIRArrayQubitReleaseQubit; - operandType = cudaq::opt::getQubitType(context); + operandType = cudaq::cg::getLLVMQubitType(context); } FlatSymbolRefAttr deallocSymbolRef = @@ -220,7 +218,7 @@ class ConcatOpRewrite : public ConvertOpToLLVMPattern { auto parentModule = concat->getParentOfType(); auto context = parentModule->getContext(); - auto arrType = cudaq::opt::getArrayType(context); + auto arrType = cudaq::cg::getLLVMArrayType(context); auto loc = concat.getLoc(); StringRef qirArrayConcatName = cudaq::opt::QIRArrayConcatArray; @@ -233,7 +231,7 @@ class ConcatOpRewrite : public ConvertOpToLLVMPattern { return success(); } - auto qirArrayTy = cudaq::opt::getArrayType(context); + auto qirArrayTy = cudaq::cg::getLLVMArrayType(context); auto i8PtrTy = cudaq::opt::factory::getPointerType(context); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRArrayCreateArray, qirArrayTy, @@ -242,23 +240,24 @@ class ConcatOpRewrite : public ConvertOpToLLVMPattern { cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRArrayGetElementPtr1d, i8PtrTy, {qirArrayTy, rewriter.getIntegerType(64)}, parentModule); - Value zero = rewriter.create(loc, 0, 64); - Value one = rewriter.create(loc, 1, 64); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + Value one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); // FIXME: 8 bytes is assumed to be the sizeof(char*) on the target machine. - Value eight = rewriter.create(loc, 8, 32); + Value eight = arith::ConstantIntOp::create(rewriter, loc, 8, 32); // Function to convert a QIR Qubit value to an Array value. auto wrapQubitInArray = [&](Value v) -> Value { - if (v.getType() != cudaq::opt::getQubitType(context)) + if (v.getType() != cudaq::cg::getLLVMQubitType(context)) return v; - auto createCall = rewriter.create( - loc, qirArrayTy, symbolRef, ArrayRef{eight, one}); + auto createCall = LLVM::CallOp::create( + rewriter, loc, qirArrayTy, symbolRef, ArrayRef{eight, one}); auto result = createCall.getResult(); - auto call = rewriter.create(loc, i8PtrTy, getSymbolRef, - ArrayRef{result, zero}); - Value pointer = rewriter.create( - loc, cudaq::opt::factory::getPointerType(i8PtrTy), call.getResult()); - auto cast = rewriter.create(loc, i8PtrTy, v); - rewriter.create(loc, cast, pointer); + auto call = LLVM::CallOp::create(rewriter, loc, i8PtrTy, getSymbolRef, + ArrayRef{result, zero}); + Value pointer = LLVM::BitcastOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(i8PtrTy), + call.getResult()); + auto cast = LLVM::BitcastOp::create(rewriter, loc, i8PtrTy, v); + LLVM::StoreOp::create(rewriter, loc, cast, pointer); return result; }; @@ -267,8 +266,8 @@ class ConcatOpRewrite : public ConvertOpToLLVMPattern { auto frontArr = wrapQubitInArray(adaptor.getOperands().front()); for (auto oper : adaptor.getOperands().drop_front(1)) { auto backArr = wrapQubitInArray(oper); - auto glue = rewriter.create( - loc, qirArrayTy, concatFunc, ArrayRef{frontArr, backArr}); + auto glue = LLVM::CallOp::create(rewriter, loc, qirArrayTy, concatFunc, + ArrayRef{frontArr, backArr}); frontArr = glue.getResult(); } rewriter.replaceOp(concat, frontArr); @@ -305,9 +304,8 @@ class ExtractQubitOpRewrite auto qir_array_get_element_ptr_1d = cudaq::opt::QIRArrayGetElementPtr1d; - auto array_qbit_type = cudaq::opt::getArrayType(context); - auto qbit_element_ptr_type = - LLVM::LLVMPointerType::get(rewriter.getI8Type()); + auto array_qbit_type = cudaq::cg::getLLVMArrayType(context); + auto qbit_element_ptr_type = cudaq::opt::factory::getPointerType(context); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qir_array_get_element_ptr_1d, qbit_element_ptr_type, @@ -316,25 +314,25 @@ class ExtractQubitOpRewrite Value idx_operand; auto i64Ty = rewriter.getI64Type(); if (extract.hasConstantIndex()) { - idx_operand = rewriter.create( - loc, extract.getConstantIndex(), i64Ty); + idx_operand = arith::ConstantIntOp::create( + rewriter, loc, extract.getConstantIndex(), 64); } else { idx_operand = adaptor.getOperands()[1]; if (idx_operand.getType().isIntOrFloat() && cast(idx_operand.getType()).getWidth() < 64) - idx_operand = rewriter.create(loc, i64Ty, idx_operand); + idx_operand = LLVM::ZExtOp::create(rewriter, loc, i64Ty, idx_operand); } - auto get_qbit_qir_call = rewriter.create( - loc, qbit_element_ptr_type, symbolRef, + auto get_qbit_qir_call = LLVM::CallOp::create( + rewriter, loc, qbit_element_ptr_type, symbolRef, llvm::ArrayRef({adaptor.getOperands().front(), idx_operand})); - auto bitcast = rewriter.create( - loc, LLVM::LLVMPointerType::get(cudaq::opt::getQubitType(context)), + auto bitcast = LLVM::BitcastOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), get_qbit_qir_call.getResult()); rewriter.replaceOpWithNewOp( - extract, cudaq::opt::getQubitType(context), bitcast.getResult()); + extract, cudaq::cg::getLLVMQubitType(context), bitcast.getResult()); return success(); } }; @@ -364,11 +362,12 @@ class MakeStruqOpPattern : public ConvertOpToLLVMPattern { auto loc = mkStruq.getLoc(); auto *ctx = rewriter.getContext(); auto toTy = getTypeConverter()->convertType(mkStruq.getType()); - Value result = rewriter.create(loc, toTy); + Value result = LLVM::UndefOp::create(rewriter, loc, toTy); std::int64_t count = 0; for (auto op : adaptor.getOperands()) { auto off = DenseI64ArrayAttr::get(ctx, ArrayRef{count}); - result = rewriter.create(loc, toTy, result, op, off); + result = + LLVM::InsertValueOp::create(rewriter, loc, toTy, result, op, off); count++; } rewriter.replaceOp(mkStruq, result); @@ -387,7 +386,7 @@ class SubveqOpRewrite : public ConvertOpToLLVMPattern { auto parentModule = subveq->getParentOfType(); auto *context = parentModule->getContext(); constexpr auto rtSubveqFuncName = cudaq::opt::QIRArraySlice; - auto arrayTy = cudaq::opt::getArrayType(context); + auto arrayTy = cudaq::cg::getLLVMArrayType(context); auto resultTy = arrayTy; auto i32Ty = rewriter.getI32Type(); @@ -398,27 +397,27 @@ class SubveqOpRewrite : public ConvertOpToLLVMPattern { auto lowArg = [&]() -> Value { if (!adaptor.getLower()) - return rewriter.create(loc, adaptor.getRawLower(), - 64); + return arith::ConstantIntOp::create(rewriter, loc, + adaptor.getRawLower(), 64); return adaptor.getLower(); }(); auto highArg = [&]() -> Value { if (!adaptor.getUpper()) - return rewriter.create(loc, adaptor.getRawUpper(), - 64); + return arith::ConstantIntOp::create(rewriter, loc, + adaptor.getRawUpper(), 64); return adaptor.getUpper(); }(); auto extend = [&](Value &v) -> Value { if (isa(v.getType()) && cast(v.getType()).getWidth() < 64) - return rewriter.create(loc, i64Ty, v); + return LLVM::ZExtOp::create(rewriter, loc, i64Ty, v); return v; }; lowArg = extend(lowArg); highArg = extend(highArg); Value inArr = adaptor.getOperands()[0]; - auto one32 = rewriter.create(loc, 1, i32Ty); - auto one64 = rewriter.create(loc, 1, i64Ty); + auto one32 = arith::ConstantIntOp::create(rewriter, loc, 1, 32); + auto one64 = arith::ConstantIntOp::create(rewriter, loc, 1, 64); rewriter.replaceOpWithNewOp( subveq, resultTy, symbolRef, ValueRange{inArr, one32, lowArg, one64, highArg}); @@ -442,7 +441,7 @@ class ResetRewrite : public ConvertOpToLLVMPattern { auto qirFunctionName = cudaq::opt::QIRQISPrefix + instName; // Create the qubit pointer type - auto qirQubitPointerType = cudaq::opt::getQubitType(context); + auto qirQubitPointerType = cudaq::cg::getLLVMQubitType(context); // Get the function reference for the reset function auto qirFunctionSymbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( @@ -471,7 +470,7 @@ class ExpPauliRewrite : public ConvertOpToLLVMPattern { auto qirFunctionName = qirQisPrefix + "exp_pauli"; FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qirFunctionName, /*return type=*/LLVM::LLVMVoidType::get(context), - {rewriter.getF64Type(), cudaq::opt::getArrayType(context), + {rewriter.getF64Type(), cudaq::cg::getLLVMArrayType(context), cudaq::opt::factory::getPointerType(context)}, parentModule); SmallVector operands = adaptor.getOperands(); @@ -484,35 +483,27 @@ class ExpPauliRewrite : public ConvertOpToLLVMPattern { auto pauliConst = builder.genCStringLiteralAppendNul( loc, parentModule, *instOp.getPauliLiteral()); // Create a pauli reference and make it the last operand. - operands.push_back(rewriter.create( - loc, cudaq::opt::factory::getPointerType(pauliConst.getType()), + operands.push_back(LLVM::AddressOfOp::create( + rewriter, loc, + cudaq::opt::factory::getPointerType(pauliConst.getType()), pauliConst.getSymName())); } auto pauliWord = operands.back(); - if (auto ptrTy = dyn_cast(pauliWord.getType())) { - // Make sure we have the right types to extract the - // length of the string literal - auto ptrEleTy = ptrTy.getElementType(); - auto innerArrTy = dyn_cast(ptrEleTy); - if (!innerArrTy) - return instOp.emitError( - "exp_pauli string literal expected to be ptr."); - - // Get the number of elements in the provided string literal - auto numElements = innerArrTy.getNumElements() - 1; + if (isa(pauliWord.getType())) { + // With opaque pointers we get the string length from the literal + auto numElements = static_cast(instOp.getPauliLiteral()->size()); // Remove the old operand operands.pop_back(); // We must create the {i8*, i64} struct from the string literal - SmallVector structTys{ - LLVM::LLVMPointerType::get(rewriter.getI8Type()), - rewriter.getI64Type()}; + auto ptrTy = cudaq::opt::factory::getPointerType(context); + SmallVector structTys{ptrTy, rewriter.getI64Type()}; auto structTy = LLVM::LLVMStructType::getLiteral(context, structTys); // Allocate the char span struct - Value alloca = cudaq::opt::factory::createLLVMTemporary( - loc, rewriter, LLVM::LLVMPointerType::get(structTy)); + Value alloca = + cudaq::opt::factory::createLLVMTemporary(loc, rewriter, structTy); // We'll need these constants auto zero = cudaq::opt::factory::genLlvmI64Constant(loc, rewriter, 0); @@ -522,22 +513,21 @@ class ExpPauliRewrite : public ConvertOpToLLVMPattern { // Set the string literal data auto charPtrTy = cudaq::opt::factory::getPointerType(context); - auto strPtrTy = LLVM::LLVMPointerType::get(charPtrTy); - auto strPtr = rewriter.create(loc, strPtrTy, alloca, - ValueRange{zero, zero}); + auto strPtr = LLVM::GEPOp::create(rewriter, loc, charPtrTy, structTy, + alloca, ValueRange{zero, zero}); auto castedPauli = - rewriter.create(loc, charPtrTy, pauliWord); - rewriter.create(loc, castedPauli, strPtr); + LLVM::BitcastOp::create(rewriter, loc, charPtrTy, pauliWord); + LLVM::StoreOp::create(rewriter, loc, castedPauli, strPtr); // Set the integer length - auto intPtr = rewriter.create( - loc, LLVM::LLVMPointerType::get(rewriter.getI64Type()), alloca, - ValueRange{zero, one}); - rewriter.create(loc, size, intPtr); + auto i64PtrTy = cudaq::opt::factory::getPointerType(context); + auto intPtr = LLVM::GEPOp::create(rewriter, loc, i64PtrTy, structTy, + alloca, ValueRange{zero, one}); + LLVM::StoreOp::create(rewriter, loc, size, intPtr); // Cast to raw opaque pointer auto castedStore = - rewriter.create(loc, charPtrTy, alloca); + LLVM::BitcastOp::create(rewriter, loc, charPtrTy, alloca); operands.push_back(castedStore); rewriter.replaceOpWithNewOp(instOp, TypeRange{}, symbolRef, operands); @@ -548,10 +538,10 @@ class ExpPauliRewrite : public ConvertOpToLLVMPattern { // Allocate a stack slot for it and store what we have to that pointer, // pass the pointer to NVQIR Value alloca = cudaq::opt::factory::createLLVMTemporary( - loc, rewriter, LLVM::LLVMPointerType::get(pauliWord.getType())); - rewriter.create(loc, pauliWord, alloca); - auto castedPauli = rewriter.create( - loc, cudaq::opt::factory::getPointerType(context), alloca); + loc, rewriter, cudaq::opt::factory::getPointerType(context)); + LLVM::StoreOp::create(rewriter, loc, pauliWord, alloca); + auto castedPauli = LLVM::BitcastOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), alloca); operands.pop_back(); operands.push_back(castedPauli); rewriter.replaceOpWithNewOp(instOp, TypeRange{}, symbolRef, @@ -588,8 +578,8 @@ class ConvertOpWithControls : public ConvertOpToLLVMPattern { auto qirFunctionName = qirQisPrefix + instName + "__ctl"; // Useful types we'll need - auto qirArrayType = cudaq::opt::getArrayType(context); - auto qirQubitPointerType = cudaq::opt::getQubitType(context); + auto qirArrayType = cudaq::cg::getLLVMArrayType(context); + auto qirQubitPointerType = cudaq::cg::getLLVMQubitType(context); auto i64Type = rewriter.getI64Type(); // __quantum__qis__NAME__ctl(Array*, Qubit*) Type @@ -599,9 +589,6 @@ class ConvertOpWithControls : public ConvertOpToLLVMPattern { return failure(); if (numTargetOperands == 2) argTys.push_back(qirQubitPointerType); - auto instOpQISFunctionType = - LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(context), argTys); - // Get the function pointer for the ctrl operation auto qirFunctionSymbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qirFunctionName, LLVM::LLVMVoidType::get(context), argTys, @@ -622,18 +609,20 @@ class ConvertOpWithControls : public ConvertOpToLLVMPattern { // function. FlatSymbolRefAttr applyMultiControlFunction; SmallVector args; - Value ctrlOpPointer = rewriter.create( - loc, LLVM::LLVMPointerType::get(instOpQISFunctionType), + Value ctrlOpPointer = LLVM::AddressOfOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), qirFunctionSymbolRef); Value numControlOperands = - rewriter.create(loc, i64Type, numControls); + arith::ConstantIntOp::create(rewriter, loc, numControls, 64); args.push_back(numControlOperands); // Check if all controls are qubit types, if so retain existing - // functionality. + // functionality. With opaque pointers, both qubit (RefType) and array + // (VeqType) convert to the same !llvm.ptr type, so we must check the + // original quake types to distinguish them. auto allControlsAreQubits = [&]() { - for (auto c : adaptor.getControls()) - if (c.getType() != qirQubitPointerType) + for (auto c : instOp.getControls()) + if (!isa(c.getType())) return false; return true; }(); @@ -644,8 +633,8 @@ class ConvertOpWithControls : public ConvertOpToLLVMPattern { applyMultiControlFunction = cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::NVQIRInvokeWithControlBits, LLVM::LLVMVoidType::get(context), - {i64Type, LLVM::LLVMPointerType::get(instOpQISFunctionType)}, - parentModule, true); + {i64Type, cudaq::opt::factory::getPointerType(context)}, parentModule, + true); } else { // Otherwise use the general function, which can handle registers of // qubits and multiple target qubits. Get symbol for the @@ -653,8 +642,8 @@ class ConvertOpWithControls : public ConvertOpToLLVMPattern { applyMultiControlFunction = cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::NVQIRInvokeWithControlRegisterOrBits, LLVM::LLVMVoidType::get(context), - {i64Type, LLVM::LLVMPointerType::get(i64Type), i64Type, - LLVM::LLVMPointerType::get(instOpQISFunctionType)}, + {i64Type, cudaq::opt::factory::getPointerType(context), i64Type, + cudaq::opt::factory::getPointerType(context)}, parentModule, true); // The total number of control qubits may be more than the number of @@ -667,17 +656,26 @@ class ConvertOpWithControls : public ConvertOpToLLVMPattern { // and $0$ otherwise. Value isArrayAndLengthArr = cudaq::opt::factory::packIsArrayAndLengthArray( - loc, rewriter, parentModule, numControls, adaptor.getControls()); + loc, rewriter, parentModule, numControls, adaptor.getControls(), + instOp.getControls()); args.push_back(isArrayAndLengthArr); args.push_back( - rewriter.create(loc, i64Type, numTargetOperands)); + arith::ConstantIntOp::create(rewriter, loc, numTargetOperands, 64)); } args.push_back(ctrlOpPointer); args.append(instOperands.begin(), instOperands.end()); // Call our utility function. - rewriter.replaceOpWithNewOp(instOp, TypeRange{}, - applyMultiControlFunction, args); + // For vararg calls, we need to set the var_callee_type attribute. + TypeAttr varCalleeType; + if (auto fn = parentModule.template lookupSymbol( + applyMultiControlFunction.getLeafReference())) { + varCalleeType = TypeAttr::get(fn.getFunctionType()); + } + auto callOp = rewriter.replaceOpWithNewOp( + instOp, TypeRange{}, applyMultiControlFunction, args); + if (varCalleeType) + callOp.setVarCalleeTypeAttr(varCalleeType); return success(); } @@ -710,7 +708,7 @@ class OneTargetRewrite : public ConvertOpWithControls { qirQisPrefix + instName + (instOp.getIsAdj() ? "__adj" : ""); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qirFunctionName, /*return type=*/LLVM::LLVMVoidType::get(context), - {cudaq::opt::getQubitType(context)}, parentModule); + {cudaq::cg::getLLVMQubitType(context)}, parentModule); rewriter.replaceOpWithNewOp(instOp, TypeRange{}, symbolRef, adaptor.getOperands()); return success(); @@ -738,18 +736,18 @@ class OneTargetOneParamRewrite : public ConvertOpToLLVMPattern { std::string qirQisPrefix = cudaq::opt::QIRQISPrefix; auto qirFunctionName = qirQisPrefix + instName; - auto qubitIndexType = cudaq::opt::getQubitType(context); - auto qubitArrayType = cudaq::opt::getArrayType(context); - auto paramType = FloatType::getF64(context); + auto qubitIndexType = cudaq::cg::getLLVMQubitType(context); + auto qubitArrayType = cudaq::cg::getLLVMArrayType(context); + auto paramType = rewriter.getF64Type(); SmallVector funcArgs; auto castToDouble = [&](Value v) { if (v.getType().getIntOrFloatBitWidth() < 64) - v = rewriter.create(loc, rewriter.getF64Type(), v); + v = arith::ExtFOp::create(rewriter, loc, rewriter.getF64Type(), v); return v; }; Value val = instOp.getIsAdj() - ? rewriter.create(loc, instOperands[0]) + ? arith::NegFOp::create(rewriter, loc, instOperands[0]) : instOperands[0]; funcArgs.push_back(castToDouble(val)); @@ -771,10 +769,6 @@ class OneTargetOneParamRewrite : public ConvertOpToLLVMPattern { qirFunctionName += "__ctl"; // __quantum__qis__NAME__ctl(double, Array*, Qubit*) Type - auto instOpQISFunctionType = LLVM::LLVMFunctionType::get( - LLVM::LLVMVoidType::get(context), - {paramType, qubitArrayType, qubitIndexType}); - // Get function pointer to ctrl operation FlatSymbolRefAttr instSymbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( @@ -802,8 +796,9 @@ class OneTargetOneParamRewrite : public ConvertOpToLLVMPattern { // The remaining scenarios are best handled with the // invokeRotationWithControlQubits function. - Value ctrlOpPointer = rewriter.create( - loc, LLVM::LLVMPointerType::get(instOpQISFunctionType), instSymbolRef); + Value ctrlOpPointer = LLVM::AddressOfOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), + instSymbolRef); // Get symbol for // void invokeRotationWithControlQubits(double param, const std::size_t @@ -814,14 +809,15 @@ class OneTargetOneParamRewrite : public ConvertOpToLLVMPattern { cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::NVQIRInvokeRotationWithControlBits, LLVM::LLVMVoidType::get(context), - {paramType, i64Type, LLVM::LLVMPointerType::get(i64Type), - LLVM::LLVMPointerType::get(instOpQISFunctionType)}, + {paramType, i64Type, cudaq::opt::factory::getPointerType(context), + cudaq::opt::factory::getPointerType(context)}, parentModule, true); // Create an integer array where the kth element is N if the kth // control operand is a veq, and 0 otherwise. Value isArrayAndLengthArr = cudaq::opt::factory::packIsArrayAndLengthArray( - loc, rewriter, parentModule, numControls, adaptor.getControls()); + loc, rewriter, parentModule, numControls, adaptor.getControls(), + instOp.getControls()); funcArgs.push_back( cudaq::opt::factory::genLlvmI64Constant(loc, rewriter, numControls)); @@ -831,8 +827,16 @@ class OneTargetOneParamRewrite : public ConvertOpToLLVMPattern { funcArgs.append(adaptor.getTargets().begin(), adaptor.getTargets().end()); // Call our utility function. - rewriter.replaceOpWithNewOp( + // For vararg calls, we need to set the var_callee_type attribute. + TypeAttr varCalleeType1; + if (auto fn = parentModule.template lookupSymbol( + applyMultiControlFunction.getLeafReference())) { + varCalleeType1 = TypeAttr::get(fn.getFunctionType()); + } + auto callOp1 = rewriter.replaceOpWithNewOp( instOp, TypeRange{}, applyMultiControlFunction, funcArgs); + if (varCalleeType1) + callOp1.setVarCalleeTypeAttr(varCalleeType1); return success(); } @@ -856,9 +860,9 @@ class OneTargetTwoParamRewrite : public ConvertOpToLLVMPattern { auto qirFunctionName = std::string(cudaq::opt::QIRQISPrefix) + instName; SmallVector tmpArgTypes; - auto qubitIndexType = cudaq::opt::getQubitType(context); + auto qubitIndexType = cudaq::cg::getLLVMQubitType(context); - auto paramType = FloatType::getF64(context); + auto paramType = rewriter.getF64Type(); tmpArgTypes.push_back(paramType); tmpArgTypes.push_back(paramType); tmpArgTypes.push_back(qubitIndexType); @@ -870,14 +874,14 @@ class OneTargetTwoParamRewrite : public ConvertOpToLLVMPattern { SmallVector funcArgs; auto castToDouble = [&](Value v) { if (v.getType().getIntOrFloatBitWidth() < 64) - v = rewriter.create(loc, rewriter.getF64Type(), v); + v = arith::ExtFOp::create(rewriter, loc, rewriter.getF64Type(), v); return v; }; Value v = adaptor.getOperands()[0]; - v = instOp.getIsAdj() ? rewriter.create(loc, v) : v; + v = instOp.getIsAdj() ? arith::NegFOp::create(rewriter, loc, v) : v; funcArgs.push_back(castToDouble(v)); v = adaptor.getOperands()[1]; - v = instOp.getIsAdj() ? rewriter.create(loc, v) : v; + v = instOp.getIsAdj() ? arith::NegFOp::create(rewriter, loc, v) : v; funcArgs.push_back(castToDouble(v)); // TODO: What about the control qubits? @@ -915,20 +919,20 @@ class OneTargetThreeParamRewrite : public ConvertOpToLLVMPattern { std::string qirQisPrefix = cudaq::opt::QIRQISPrefix; auto qirFunctionName = qirQisPrefix + instName; - auto qubitIndexType = cudaq::opt::getQubitType(context); - auto qubitArrayType = cudaq::opt::getArrayType(context); - auto paramType = FloatType::getF64(context); + auto qubitIndexType = cudaq::cg::getLLVMQubitType(context); + auto qubitArrayType = cudaq::cg::getLLVMArrayType(context); + auto paramType = rewriter.getF64Type(); SmallVector funcArgs; auto castToDouble = [&](Value v) { if (v.getType().getIntOrFloatBitWidth() < 64) - v = rewriter.create(loc, rewriter.getF64Type(), v); + v = arith::ExtFOp::create(rewriter, loc, rewriter.getF64Type(), v); return v; }; // 3 parameters for (int i = 0; i < 3; i++) { Value val = instOp.getIsAdj() - ? rewriter.create(loc, instOperands[i]) + ? arith::NegFOp::create(rewriter, loc, instOperands[i]) : instOperands[i]; funcArgs.push_back(castToDouble(val)); } @@ -951,10 +955,6 @@ class OneTargetThreeParamRewrite : public ConvertOpToLLVMPattern { qirFunctionName += "__ctl"; // __quantum__qis__u3__ctl(double, double, double, Array*, Qubit*) Type - auto instOpQISFunctionType = LLVM::LLVMFunctionType::get( - LLVM::LLVMVoidType::get(context), - {paramType, paramType, paramType, qubitArrayType, qubitIndexType}); - // Get function pointer to ctrl operation FlatSymbolRefAttr instSymbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( @@ -983,8 +983,9 @@ class OneTargetThreeParamRewrite : public ConvertOpToLLVMPattern { // The remaining scenarios are best handled with the // invokeU3RotationWithControlQubits function. - Value ctrlOpPointer = rewriter.create( - loc, LLVM::LLVMPointerType::get(instOpQISFunctionType), instSymbolRef); + Value ctrlOpPointer = LLVM::AddressOfOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), + instSymbolRef); // Get symbol for void invokeU3RotationWithControlQubits(double theta, // double phi, double lambda, const std::size_t numControlOperands, i64* @@ -996,14 +997,15 @@ class OneTargetThreeParamRewrite : public ConvertOpToLLVMPattern { cudaq::opt::NVQIRInvokeU3RotationWithControlBits, LLVM::LLVMVoidType::get(context), {paramType, paramType, paramType, i64Type, - LLVM::LLVMPointerType::get(i64Type), - LLVM::LLVMPointerType::get(instOpQISFunctionType)}, + cudaq::opt::factory::getPointerType(context), + cudaq::opt::factory::getPointerType(context)}, parentModule, true); // Create an integer array where the kth element is N if the kth // control operand is a veq, and 0 otherwise. Value isArrayAndLengthArr = cudaq::opt::factory::packIsArrayAndLengthArray( - loc, rewriter, parentModule, numControls, adaptor.getControls()); + loc, rewriter, parentModule, numControls, adaptor.getControls(), + instOp.getControls()); funcArgs.push_back( cudaq::opt::factory::genLlvmI64Constant(loc, rewriter, numControls)); @@ -1013,8 +1015,16 @@ class OneTargetThreeParamRewrite : public ConvertOpToLLVMPattern { funcArgs.append(adaptor.getTargets().begin(), adaptor.getTargets().end()); // Call our utility function. - rewriter.replaceOpWithNewOp( + // For vararg calls, we need to set the var_callee_type attribute. + TypeAttr varCalleeType2; + if (auto fn = parentModule.template lookupSymbol( + applyMultiControlFunction.getLeafReference())) { + varCalleeType2 = TypeAttr::get(fn.getFunctionType()); + } + auto callOp2 = rewriter.replaceOpWithNewOp( instOp, TypeRange{}, applyMultiControlFunction, funcArgs); + if (varCalleeType2) + callOp2.setVarCalleeTypeAttr(varCalleeType2); return success(); } @@ -1041,7 +1051,7 @@ class TwoTargetRewrite : public ConvertOpWithControls { auto context = parentModule->getContext(); auto qirFunctionName = std::string(cudaq::opt::QIRQISPrefix) + instName; - auto qubitIndexType = cudaq::opt::getQubitType(context); + auto qubitIndexType = cudaq::cg::getLLVMQubitType(context); SmallVector tmpArgTypes = {qubitIndexType, qubitIndexType}; FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( @@ -1082,7 +1092,7 @@ class MeasureRewrite : public ConvertOpToLLVMPattern { std::string qFunctionName = cudaq::opt::QIRMeasure; Attribute regName = measure.getRegisterNameAttr(); - std::vector funcTypes{cudaq::opt::getQubitType(context)}; + std::vector funcTypes{cudaq::cg::getLLVMQubitType(context)}; std::vector args{adaptor.getOperands().front()}; bool appendName; @@ -1090,7 +1100,7 @@ class MeasureRewrite : public ConvertOpToLLVMPattern { // Change the function name qFunctionName += "__to__register"; // Append a string type argument - funcTypes.push_back(LLVM::LLVMPointerType::get(rewriter.getI8Type())); + funcTypes.push_back(cudaq::opt::factory::getPointerType(context)); appendName = true; } else { // If no register name is supplied, make one up. Zero pad the counter so @@ -1126,28 +1136,30 @@ class MeasureRewrite : public ConvertOpToLLVMPattern { rewriter.restoreInsertionPoint(insertPoint); // Get the string address and bit cast - auto regNameRef = rewriter.create( - loc, cudaq::opt::factory::getPointerType(regNameGlobal.getType()), + auto regNameRef = LLVM::AddressOfOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), regNameGlobal.getSymName()); - auto castedRegNameRef = rewriter.create( - loc, cudaq::opt::factory::getPointerType(context), regNameRef); + auto castedRegNameRef = LLVM::BitcastOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), + regNameRef); // Append to the args list if (appendName) args.push_back(castedRegNameRef); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( - qFunctionName, cudaq::opt::getResultType(context), + qFunctionName, cudaq::cg::getLLVMResultType(context), llvm::ArrayRef(funcTypes), parentModule); - auto callOp = rewriter.create( - loc, cudaq::opt::getResultType(context), symbolRef, ValueRange{args}); + auto callOp = LLVM::CallOp::create(rewriter, loc, + cudaq::cg::getLLVMResultType(context), + symbolRef, ArrayRef(args)); if (regName) callOp->setAttr("registerName", regName); auto i1Ty = rewriter.getI1Type(); - auto i1PtrTy = LLVM::LLVMPointerType::get(i1Ty); + auto i1PtrTy = cudaq::opt::factory::getPointerType(context); auto cast = - rewriter.create(loc, i1PtrTy, callOp.getResult()); + LLVM::BitcastOp::create(rewriter, loc, i1PtrTy, callOp.getResult()); rewriter.replaceOpWithNewOp(measure, i1Ty, cast); return success(); @@ -1168,10 +1180,10 @@ class GetVeqSizeOpRewrite : public OpConversionPattern { auto symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qFunctionName, rewriter.getI64Type(), - {cudaq::opt::getArrayType(context)}, parentModule); + {cudaq::cg::getLLVMArrayType(context)}, parentModule); - auto c = rewriter.create(loc, rewriter.getI64Type(), - symbolRef, adaptor.getOperands()); + auto c = LLVM::CallOp::create(rewriter, loc, rewriter.getI64Type(), + symbolRef, adaptor.getOperands()); vecsize->getResult(0).replaceAllUsesWith(c->getResult(0)); rewriter.eraseOp(vecsize); return success(); @@ -1200,8 +1212,9 @@ class ReturnBitRewrite : public OpConversionPattern { // If we are returning a llvm.ptr then we've really // been asked to return a bit, set that up here - if (ret.getNumOperands() == 1 && adaptor.getOperands().front().getType() == - cudaq::opt::getResultType(context)) { + if (ret.getNumOperands() == 1 && + adaptor.getOperands().front().getType() == + cudaq::cg::getLLVMResultType(context)) { // Bitcast the produced value, which corresponds to the value in // ret.operands()[0], from llvm.ptr to llvm.ptr. There is a @@ -1210,13 +1223,13 @@ class ReturnBitRewrite : public OpConversionPattern { // be a call to __quantum__qis__mz(Qubit*) and that in the LLVM dialect, // functions always have a single result, this should be fine. If things // change, we will need to update this. - auto bitcast = rewriter.create( - loc, LLVM::LLVMPointerType::get(rewriter.getI1Type()), + auto bitcast = LLVM::BitcastOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), adaptor.getOperands().front()); // Load the bool - auto loadBit = rewriter.create(loc, rewriter.getI1Type(), - bitcast.getResult()); + auto loadBit = LLVM::LoadOp::create(rewriter, loc, rewriter.getI1Type(), + bitcast.getResult()); // Replace all uses of the llvm.ptr with the i1, which includes // the return op. Do not replace its use in the bitcast. @@ -1255,7 +1268,7 @@ class CustomUnitaryOpRewrite Value wrapQubitInArray(Location &loc, ConversionPatternRewriter &rewriter, ModuleOp parentModule, Value v) const { auto context = rewriter.getContext(); - auto qirArrayTy = cudaq::opt::getArrayType(context); + auto qirArrayTy = cudaq::cg::getLLVMArrayType(context); auto ptrTy = cudaq::opt::factory::getPointerType(context); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRArrayCreateArray, qirArrayTy, @@ -1264,21 +1277,22 @@ class CustomUnitaryOpRewrite cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRArrayGetElementPtr1d, ptrTy, {qirArrayTy, rewriter.getIntegerType(64)}, parentModule); - Value zero = rewriter.create(loc, 0, 64); - Value one = rewriter.create(loc, 1, 64); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + Value one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); // FIXME: 8 bytes is assumed to be the sizeof(char*) on the target machine. - Value eight = rewriter.create(loc, 8, 32); - if (v.getType() != cudaq::opt::getQubitType(context)) + Value eight = arith::ConstantIntOp::create(rewriter, loc, 8, 32); + if (v.getType() != cudaq::cg::getLLVMQubitType(context)) return v; - auto createCall = rewriter.create( - loc, qirArrayTy, symbolRef, ArrayRef{eight, one}); + auto createCall = LLVM::CallOp::create(rewriter, loc, qirArrayTy, symbolRef, + ArrayRef{eight, one}); auto result = createCall.getResult(); - auto call = rewriter.create(loc, ptrTy, getSymbolRef, - ArrayRef{result, zero}); - Value pointer = rewriter.create( - loc, cudaq::opt::factory::getPointerType(ptrTy), call.getResult()); - auto cast = rewriter.create(loc, ptrTy, v); - rewriter.create(loc, cast, pointer); + auto call = LLVM::CallOp::create(rewriter, loc, ptrTy, getSymbolRef, + ArrayRef{result, zero}); + Value pointer = LLVM::BitcastOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(ptrTy), + call.getResult()); + auto cast = LLVM::BitcastOp::create(rewriter, loc, ptrTy, v); + LLVM::StoreOp::create(rewriter, loc, cast, pointer); return result; } @@ -1294,8 +1308,8 @@ class CustomUnitaryOpRewrite if (numParameters) op.emitOpError("Parameterized custom operations not yet supported."); - auto arrType = cudaq::opt::getArrayType(context); - auto qirArrayTy = cudaq::opt::getArrayType(context); + auto arrType = cudaq::cg::getLLVMArrayType(context); + auto qirArrayTy = cudaq::cg::getLLVMArrayType(context); FlatSymbolRefAttr concatFunc = cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRArrayConcatArray, arrType, {arrType, arrType}, @@ -1306,8 +1320,8 @@ class CustomUnitaryOpRewrite adaptor.getTargets().front()); for (auto oper : adaptor.getTargets().drop_front(1)) { auto backArr = wrapQubitInArray(loc, rewriter, parentModule, oper); - auto glue = rewriter.create( - loc, qirArrayTy, concatFunc, ArrayRef{targetArr, backArr}); + auto glue = LLVM::CallOp::create(rewriter, loc, qirArrayTy, concatFunc, + ArrayRef{targetArr, backArr}); targetArr = glue.getResult(); } @@ -1316,25 +1330,24 @@ class CustomUnitaryOpRewrite Value controlArr; if (controls.empty()) { // make an empty array - Value zero = rewriter.create(loc, 0, 64); - Value zero32 = rewriter.create(loc, 8, 32); + Value zero = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + Value zero32 = arith::ConstantIntOp::create(rewriter, loc, 8, 32); FlatSymbolRefAttr symbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRArrayCreateArray, - cudaq::opt::getArrayType(context), + cudaq::cg::getLLVMArrayType(context), {rewriter.getI32Type(), rewriter.getI64Type()}, parentModule); - controlArr = rewriter - .create( - loc, TypeRange{cudaq::opt::getArrayType(context)}, - symbolRef, ValueRange{zero32, zero}) + controlArr = LLVM::CallOp::create( + rewriter, loc, cudaq::cg::getLLVMArrayType(context), + symbolRef, ArrayRef{zero32, zero}) .getResult(); } else { controlArr = wrapQubitInArray(loc, rewriter, parentModule, adaptor.getControls().front()); for (auto oper : adaptor.getControls().drop_front(1)) { auto backArr = wrapQubitInArray(loc, rewriter, parentModule, oper); - auto glue = rewriter.create( - loc, qirArrayTy, concatFunc, ArrayRef{controlArr, backArr}); + auto glue = LLVM::CallOp::create(rewriter, loc, qirArrayTy, concatFunc, + ArrayRef{controlArr, backArr}); controlArr = glue.getResult(); } } @@ -1369,22 +1382,21 @@ class CustomUnitaryOpRewrite // Shift back to the function rewriter.restoreInsertionPoint(insertPoint); // Get the string address and bit cast - auto opNameRef = rewriter.create( - loc, cudaq::opt::factory::getPointerType(opNameGlobal.getType()), + auto opNameRef = LLVM::AddressOfOp::create( + rewriter, loc, + cudaq::opt::factory::getPointerType(opNameGlobal.getType()), opNameGlobal.getSymName()); - auto castedOpNameRef = rewriter.create( - loc, cudaq::opt::factory::getPointerType(context), opNameRef); + auto castedOpNameRef = LLVM::BitcastOp::create( + rewriter, loc, cudaq::opt::factory::getPointerType(context), opNameRef); if (!globalOp) return op.emitOpError("global not found for custom op"); - auto complex64Ty = - typeConverter->convertType(ComplexType::get(rewriter.getF64Type())); - auto complex64PtrTy = LLVM::LLVMPointerType::get(complex64Ty); + auto complex64PtrTy = cudaq::opt::factory::getPointerType(context); Type type = typeConverter->convertType(globalOp.getType()); - auto addrOp = rewriter.create(loc, type, generatorName); + auto addrOp = LLVM::AddressOfOp::create(rewriter, loc, type, generatorName); auto unitaryData = - rewriter.create(loc, complex64PtrTy, addrOp); + LLVM::BitcastOp::create(rewriter, loc, complex64PtrTy, addrOp); StringRef qirFunctionName = op.isAdj() ? cudaq::opt::QIRCustomAdjOp : cudaq::opt::QIRCustomOp; @@ -1392,14 +1404,14 @@ class CustomUnitaryOpRewrite FlatSymbolRefAttr customSymbolRef = cudaq::opt::factory::createLLVMFunctionSymbol( qirFunctionName, LLVM::LLVMVoidType::get(context), - {complex64PtrTy, cudaq::opt::getArrayType(context), - cudaq::opt::getArrayType(context), - LLVM::LLVMPointerType::get(rewriter.getI8Type())}, + {complex64PtrTy, cudaq::cg::getLLVMArrayType(context), + cudaq::cg::getLLVMArrayType(context), + cudaq::opt::factory::getPointerType(context)}, parentModule); rewriter.replaceOpWithNewOp( op, TypeRange{}, customSymbolRef, - ValueRange{unitaryData, controlArr, targetArr, castedOpNameRef}); + ArrayRef{unitaryData, controlArr, targetArr, castedOpNameRef}); return success(); } diff --git a/lib/Optimizer/CodeGen/RemoveMeasurements.cpp b/lib/Optimizer/CodeGen/RemoveMeasurements.cpp index 056276f50e2..e3719db1efc 100644 --- a/lib/Optimizer/CodeGen/RemoveMeasurements.cpp +++ b/lib/Optimizer/CodeGen/RemoveMeasurements.cpp @@ -10,18 +10,17 @@ #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" -#define DEBUG_TYPE "qir-remove-measurements" - namespace cudaq::opt { #define GEN_PASS_DEF_REMOVEMEASUREMENTS #include "cudaq/Optimizer/CodeGen/Passes.h.inc" } // namespace cudaq::opt +#define DEBUG_TYPE "qir-remove-measurements" + using namespace mlir; namespace { @@ -32,9 +31,9 @@ class EraseMeasurements : public OpRewritePattern { LogicalResult matchAndRewrite(LLVM::CallOp call, PatternRewriter &rewriter) const override { if (auto callee = call.getCallee()) { - if (callee->equals(cudaq::opt::QIRMeasureBody) || - callee->equals(cudaq::opt::QIRRecordOutput) || - callee->equals(cudaq::opt::QIRArrayRecordOutput)) { + if (*callee == cudaq::opt::QIRMeasureBody || + *callee == cudaq::opt::QIRRecordOutput || + *callee == cudaq::opt::QIRArrayRecordOutput) { rewriter.eraseOp(call); return success(); } @@ -58,7 +57,7 @@ struct RemoveMeasurementsPass RewritePatternSet patterns(context); patterns.insert(context); LLVM_DEBUG(llvm::dbgs() << "Before measurement erasure:\n" << *op); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After measurement erasure:\n" << *op); } diff --git a/lib/Optimizer/CodeGen/ReturnToOutputLog.cpp b/lib/Optimizer/CodeGen/ReturnToOutputLog.cpp index 128ba8f64ef..790ff9e6ac8 100644 --- a/lib/Optimizer/CodeGen/ReturnToOutputLog.cpp +++ b/lib/Optimizer/CodeGen/ReturnToOutputLog.cpp @@ -12,19 +12,17 @@ #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" #include "llvm/ADT/TypeSwitch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" -#define DEBUG_TYPE "return-to-output-log" - namespace cudaq::opt { #define GEN_PASS_DEF_RETURNTOOUTPUTLOG #include "cudaq/Optimizer/CodeGen/Passes.h.inc" } // namespace cudaq::opt +#define DEBUG_TYPE "return-to-output-log" + using namespace mlir; namespace { @@ -58,9 +56,9 @@ class ReturnRewrite : public OpRewritePattern { labelStr = prefix->str(); Value label = makeLabel(loc, rewriter, labelStr); if (intTy.getWidth() == 1) { - rewriter.create(loc, TypeRange{}, - cudaq::opt::QIRBoolRecordOutput, - ArrayRef{val, label}); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRBoolRecordOutput, + ArrayRef{val, label}); return; } // Integer: convert to (signed) i64. The decoder *must* lop off any @@ -68,14 +66,15 @@ class ReturnRewrite : public OpRewritePattern { // bits by examining the real integer type. Value castVal = val; if (intTy.getWidth() < 64) - castVal = rewriter.create( - loc, rewriter.getI64Type(), val, cudaq::cc::CastOpMode::Signed); + castVal = + cudaq::cc::CastOp::create(rewriter, loc, rewriter.getI64Type(), + val, cudaq::cc::CastOpMode::Signed); else if (intTy.getWidth() > 64) - castVal = rewriter.create( - loc, rewriter.getI64Type(), val); - rewriter.create(loc, TypeRange{}, - cudaq::opt::QIRIntegerRecordOutput, - ArrayRef{castVal, label}); + castVal = cudaq::cc::CastOp::create(rewriter, loc, + rewriter.getI64Type(), val); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRIntegerRecordOutput, + ArrayRef{castVal, label}); }) .Case([&](FloatType floatTy) { int width = floatTy.getWidth(); @@ -86,11 +85,11 @@ class ReturnRewrite : public OpRewritePattern { // Floating point: convert it to double, whatever it actually is. Value castVal = val; if (floatTy != rewriter.getF64Type()) - castVal = rewriter.create( - loc, rewriter.getF64Type(), val); - rewriter.create(loc, TypeRange{}, - cudaq::opt::QIRDoubleRecordOutput, - ArrayRef{castVal, label}); + castVal = cudaq::cc::CastOp::create(rewriter, loc, + rewriter.getF64Type(), val); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRDoubleRecordOutput, + ArrayRef{castVal, label}); }) .Case([&](cudaq::cc::StructType structTy) { auto labelStr = translateType(structTy); @@ -98,15 +97,15 @@ class ReturnRewrite : public OpRewritePattern { labelStr = prefix->str(); Value label = makeLabel(loc, rewriter, labelStr); std::int32_t sz = structTy.getNumMembers(); - Value size = rewriter.create(loc, sz, 64); - rewriter.create(loc, TypeRange{}, - cudaq::opt::QIRTupleRecordOutput, - ArrayRef{size, label}); + Value size = arith::ConstantIntOp::create(rewriter, loc, sz, 64); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRTupleRecordOutput, + ArrayRef{size, label}); std::string preStr = prefix ? prefix->str() : std::string{}; for (std::int32_t i = 0; i < sz; ++i) { std::string offset = preStr + std::string(".") + std::to_string(i); - Value w = rewriter.create( - loc, structTy.getMember(i), val, + Value w = cudaq::cc::ExtractValueOp::create( + rewriter, loc, structTy.getMember(i), val, ArrayRef{i}); genOutputLog(loc, rewriter, w, offset, allowDynamic); } @@ -115,16 +114,16 @@ class ReturnRewrite : public OpRewritePattern { auto labelStr = translateType(arrTy); Value label = makeLabel(loc, rewriter, labelStr); std::int32_t sz = arrTy.getSize(); - Value size = rewriter.create(loc, sz, 64); - rewriter.create(loc, TypeRange{}, - cudaq::opt::QIRArrayRecordOutput, - ArrayRef{size, label}); + Value size = arith::ConstantIntOp::create(rewriter, loc, sz, 64); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRArrayRecordOutput, + ArrayRef{size, label}); std::string preStr = prefix ? prefix->str() : std::string{}; for (std::int32_t i = 0; i < sz; ++i) { std::string offset = preStr + std::string("[") + std::to_string(i) + std::string("]"); - Value w = rewriter.create( - loc, arrTy.getElementType(), val, + Value w = cudaq::cc::ExtractValueOp::create( + rewriter, loc, arrTy.getElementType(), val, ArrayRef{i}); genOutputLog(loc, rewriter, w, offset, allowDynamic); } @@ -138,24 +137,42 @@ class ReturnRewrite : public OpRewritePattern { std::int32_t sz = *maybeLen; auto labelStr = translateType(vecTy, sz); Value label = makeLabel(loc, rewriter, labelStr); - Value size = rewriter.create(loc, sz, 64); - rewriter.create(loc, TypeRange{}, - cudaq::opt::QIRArrayRecordOutput, - ArrayRef{size, label}); + Value size = arith::ConstantIntOp::create(rewriter, loc, sz, 64); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRArrayRecordOutput, + ArrayRef{size, label}); std::string preStr = prefix ? prefix->str() : std::string{}; Value rawBuffer = vecInit.getBuffer(); + if (auto callOp = rawBuffer.getDefiningOp()) { + if (callOp.getCallee() == "__nvqpp_vectorCopyCtor" && + callOp.getNumOperands() >= 1) { + rawBuffer = callOp.getOperand(0); + } else if (callOp.getCallee() == "malloc") { + for (auto *user : rawBuffer.getUsers()) { + auto memcpy = dyn_cast(user); + if (memcpy && + memcpy.getCallee().starts_with("llvm.memcpy") && + memcpy.getNumOperands() >= 2 && + memcpy.getOperand(0) == rawBuffer) { + rawBuffer = memcpy.getOperand(1); + break; + } + } + } + } auto eleTy = vecTy.getElementType(); auto buffTy = cudaq::cc::PointerType::get(eleTy); auto ptrArrTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); Value buffer = - rewriter.create(loc, ptrArrTy, rawBuffer); + cudaq::cc::CastOp::create(rewriter, loc, ptrArrTy, rawBuffer); for (std::int32_t i = 0; i < sz; ++i) { std::string offset = preStr + std::string("[") + std::to_string(i) + std::string("]"); - auto v = rewriter.create( - loc, buffTy, buffer, ArrayRef{i}); - Value w = rewriter.create(loc, v); + auto v = cudaq::cc::ComputePtrOp::create( + rewriter, loc, buffTy, buffer, + ArrayRef{i}); + Value w = cudaq::cc::LoadOp::create(rewriter, loc, v); genOutputLog(loc, rewriter, w, offset, allowDynamic); } return; @@ -165,46 +182,46 @@ class ReturnRewrite : public OpRewritePattern { return; auto eleTy = vecTy.getElementType(); auto i8PtrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); - Value size = rewriter.create( - loc, rewriter.getI64Type(), val); + Value size = cudaq::cc::StdvecSizeOp::create( + rewriter, loc, rewriter.getI64Type(), val); Value rawData = - rewriter.create(loc, i8PtrTy, val); + cudaq::cc::StdvecDataOp::create(rewriter, loc, i8PtrTy, val); if (auto intTy = dyn_cast(eleTy)) { if (eleTy == rewriter.getI1Type()) { - rewriter.create(loc, TypeRange{}, - cudaq::opt::QIRBoolSpanRecordOutput, - ArrayRef{rawData, size}); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRBoolSpanRecordOutput, + ArrayRef{rawData, size}); } else { std::int32_t byteSize = (intTy.getWidth() + 7) / 8; Value elemSize = - rewriter.create(loc, byteSize, 32); - rewriter.create( - loc, TypeRange{}, cudaq::opt::QIRIntSpanRecordOutput, - ArrayRef{rawData, size, elemSize}); + arith::ConstantIntOp::create(rewriter, loc, byteSize, 32); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRIntSpanRecordOutput, + ArrayRef{rawData, size, elemSize}); } } else if (isa(eleTy)) { auto floatTy = cast(eleTy); std::int32_t byteSize = floatTy.getWidth() / 8; Value elemSize = - rewriter.create(loc, byteSize, 32); - rewriter.create( - loc, TypeRange{}, cudaq::opt::QIRFloatSpanRecordOutput, - ArrayRef{rawData, size, elemSize}); + arith::ConstantIntOp::create(rewriter, loc, byteSize, 32); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QIRFloatSpanRecordOutput, + ArrayRef{rawData, size, elemSize}); } else { // Unsupported element type — trap. LLVM_DEBUG(llvm::dbgs() << "ReturnToOutputLog -- unsupported element type: " << eleTy << "\n"); - Value one = rewriter.create(loc, 1, 64); - rewriter.create(loc, TypeRange{}, cudaq::opt::QISTrap, - ValueRange{one}); + Value one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); + func::CallOp::create(rewriter, loc, TypeRange{}, + cudaq::opt::QISTrap, ValueRange{one}); } }) .Default([&](Type) { // If we reach here, we don't know how to handle this type. - Value one = rewriter.create(loc, 1, 64); - rewriter.create(loc, TypeRange{}, cudaq::opt::QISTrap, - ValueRange{one}); + Value one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); + func::CallOp::create(rewriter, loc, TypeRange{}, cudaq::opt::QISTrap, + ValueRange{one}); }); } @@ -232,9 +249,12 @@ class ReturnRewrite : public OpRewritePattern { return {std::string("array<") + translateType(arrTy.getElementType()) + std::string(" x ") + std::to_string(size) + std::string(">")}; } - if (auto arrTy = dyn_cast(ty)) + if (auto arrTy = dyn_cast(ty)) { + if (!vecSz) + return {"error"}; return {std::string("array<") + translateType(arrTy.getElementType()) + std::string(" x ") + std::to_string(*vecSz) + std::string(">")}; + } return {"error"}; } @@ -242,10 +262,10 @@ class ReturnRewrite : public OpRewritePattern { StringRef label) { auto strLitTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get( rewriter.getContext(), rewriter.getI8Type(), label.size() + 1)); - Value lit = rewriter.create( - loc, strLitTy, rewriter.getStringAttr(label)); + Value lit = cudaq::cc::CreateStringLiteralOp::create( + rewriter, loc, strLitTy, rewriter.getStringAttr(label)); auto i8PtrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); - return rewriter.create(loc, i8PtrTy, lit); + return cudaq::cc::CastOp::create(rewriter, loc, i8PtrTy, lit); } bool allowDynamic; @@ -287,7 +307,7 @@ struct ReturnToOutputLogPass RewritePatternSet patterns(ctx); patterns.insert(ctx, allowDynamicResult); LLVM_DEBUG(llvm::dbgs() << "Before return to output logging:\n" << module); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After return to output logging:\n" << module); } diff --git a/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp b/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp index 023ca43709a..37add457bbb 100644 --- a/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp +++ b/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp @@ -15,9 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FormatAdapters.h" -#include -#include -#include using namespace mlir; @@ -117,15 +114,15 @@ static LogicalResult emitOperation(nlohmann::json &json, // Propagate the name of this qubit into the operation output values. emitter.getOrAssignName( - optor->getResult(0), + optor.getControls()[0], emitter.getOrAssignName(optor.getControls()[0]).str()); - emitter.getOrAssignName(optor->getResult(1), + emitter.getOrAssignName(optor.getTarget(0), emitter.getOrAssignName(optor.getTarget(0)).str()); } else { json["name"] = "prx"; if (optor.getParameters().size() != 2) - optor.emitError("IQM prx gate expects exactly two parameters."); + optor.emitError("IQM phased_rx gate expects exactly two parameters."); auto parameter0 = cudaq::getParameterValueAsDouble(optor.getParameters()[0]); @@ -139,7 +136,7 @@ static LogicalResult emitOperation(nlohmann::json &json, json["args"]["phase_t"] = convertToFullTurns(*parameter1); // Propagate the name of this qubit into the operation output values. - emitter.getOrAssignName(optor->getResult(0), + emitter.getOrAssignName(optor.getTarget(0), emitter.getOrAssignName(optor.getTarget(0)).str()); } @@ -200,9 +197,9 @@ static LogicalResult emitOperation(nlohmann::json &json, .Case([](auto) { return success(); }) .Default([&](Operation *) -> LogicalResult { // Allow LLVM and cc dialect ops (for storing measure results). - if (op.getName().getDialectNamespace().equals("llvm") || - op.getName().getDialectNamespace().equals("cc") || - op.getName().getDialectNamespace().equals("arith")) + if (op.getName().getDialectNamespace() == "llvm" || + op.getName().getDialectNamespace() == "cc" || + op.getName().getDialectNamespace() == "arith") return success(); return op.emitOpError() << "unable to translate op to IQM Json " << op.getName().getIdentifier().str(); diff --git a/lib/Optimizer/CodeGen/TranslateToOpenQASM.cpp b/lib/Optimizer/CodeGen/TranslateToOpenQASM.cpp index 9b60034fa15..ed842e6a445 100644 --- a/lib/Optimizer/CodeGen/TranslateToOpenQASM.cpp +++ b/lib/Optimizer/CodeGen/TranslateToOpenQASM.cpp @@ -6,20 +6,17 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ +#include "PassDetails.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" #include "cudaq/Optimizer/Builder/RuntimeNames.h" #include "cudaq/Optimizer/CodeGen/Emitter.h" #include "cudaq/Optimizer/CodeGen/OpenQASMEmitter.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/TypeSwitch.h" #include "mlir/Analysis/CallGraph.h" using namespace mlir; -using namespace cudaq; //===----------------------------------------------------------------------===// // Helper functions @@ -52,13 +49,14 @@ static LogicalResult translateOperatorName(quake::OperatorInterface optor, return success(); } -static LogicalResult printParameters(Emitter &emitter, ValueRange parameters) { +static LogicalResult printParameters(cudaq::Emitter &emitter, + ValueRange parameters) { if (parameters.empty()) return success(); emitter.os << '('; auto isFailure = false; llvm::interleaveComma(parameters, emitter.os, [&](Value value) { - auto parameter = getParameterValueAsDouble(value); + auto parameter = cudaq::getParameterValueAsDouble(value); if (!parameter.has_value()) { isFailure = true; return; @@ -70,8 +68,8 @@ static LogicalResult printParameters(Emitter &emitter, ValueRange parameters) { return failure(isFailure); } -static StringRef printClassicalAllocation(Emitter &emitter, Value bitOrVector, - size_t size) { +static StringRef printClassicalAllocation(cudaq::Emitter &emitter, + Value bitOrVector, size_t size) { auto name = emitter.createName(); emitter.os << llvm::formatv("creg {0}[{1}];\n", name, size); if (size == 1) @@ -83,10 +81,11 @@ static StringRef printClassicalAllocation(Emitter &emitter, Value bitOrVector, // Emitters functions //===----------------------------------------------------------------------===// -static LogicalResult emitOperation(Emitter &emitter, Operation &op); +static LogicalResult emitOperation(cudaq::Emitter &emitter, Operation &op); -static LogicalResult emitEntryPoint(Emitter &emitter, func::FuncOp kernel) { - Emitter::Scope scope(emitter, /*isEntryPoint=*/true); +static LogicalResult emitEntryPoint(cudaq::Emitter &emitter, + func::FuncOp kernel) { + cudaq::Emitter::Scope scope(emitter, /*isEntryPoint=*/true); for (Operation &op : kernel.getOps()) { if (failed(emitOperation(emitter, op))) return failure(); @@ -94,7 +93,7 @@ static LogicalResult emitEntryPoint(Emitter &emitter, func::FuncOp kernel) { return success(); } -static LogicalResult emitOperation(Emitter &emitter, ModuleOp moduleOp) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, ModuleOp moduleOp) { func::FuncOp entryPoint = nullptr; emitter.os << "// Code generated by NVIDIA's nvq++ compiler\n"; emitter.os << "OPENQASM 2.0;\n\n"; @@ -149,7 +148,8 @@ static LogicalResult emitOperation(Emitter &emitter, ModuleOp moduleOp) { return emitEntryPoint(emitter, entryPoint); } -static LogicalResult emitOperation(Emitter &emitter, quake::AllocaOp allocaOp) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, + quake::AllocaOp allocaOp) { Value refOrVeq = allocaOp.getRefOrVec(); auto name = emitter.createName(); auto size = 1; @@ -165,7 +165,7 @@ static LogicalResult emitOperation(Emitter &emitter, quake::AllocaOp allocaOp) { return success(); } -static LogicalResult emitOperation(Emitter &emitter, quake::ApplyOp op) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, quake::ApplyOp op) { // In Quake's reference semantics form, kernels only return classical types. // Thus, we check whether the numbers of results is zero or not. if (op.getNumResults() > 0) @@ -203,7 +203,7 @@ static inline StringRef formatFunctionName(StringRef quakeName) { return quakeName.drop_while([](char C) { return C == '_'; }); } -static LogicalResult emitOperation(Emitter &emitter, func::FuncOp op) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, func::FuncOp op) { if (op.isPrivate()) return success(); @@ -214,7 +214,7 @@ static LogicalResult emitOperation(Emitter &emitter, func::FuncOp op) { // empty `__qpu__` helper), which have the prefix and are kept so that any // call sites remain valid. if (!op.isExternal() && op.front().without_terminator().empty() && - !op.getName().starts_with(runtime::cudaqGenPrefixName)) + !op.getName().starts_with(cudaq::runtime::cudaqGenPrefixName)) return success(); // In Quake's reference semantics form, kernels only return classical types. @@ -232,7 +232,7 @@ static LogicalResult emitOperation(Emitter &emitter, func::FuncOp op) { parameters.push_back(arg); } - Emitter::Scope scope(emitter); + cudaq::Emitter::Scope scope(emitter); emitter.os << "gate " << formatFunctionName(op.getName()); if (!parameters.empty()) { emitter.os << '('; @@ -260,12 +260,13 @@ static LogicalResult emitOperation(Emitter &emitter, func::FuncOp op) { return success(); } -static LogicalResult emitOperation(Emitter &emitter, quake::ExtractRefOp op) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, + quake::ExtractRefOp op) { std::optional index = std::nullopt; if (op.hasConstantIndex()) index = op.getConstantIndex(); else - index = getIndexValueAsInt(op.getIndex()); + index = cudaq::getIndexValueAsInt(op.getIndex()); auto veqName = emitter.getOrAssignName(op.getVeq()); auto qrefName = llvm::formatv("{0}[{1}]", veqName, *index); @@ -273,7 +274,8 @@ static LogicalResult emitOperation(Emitter &emitter, quake::ExtractRefOp op) { return success(); } -static LogicalResult emitOperation(Emitter &emitter, func::CallOp callOp) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, + func::CallOp callOp) { StringRef funcName = formatFunctionName(callOp.getCallee()); emitter.os << funcName; emitter.os << ' '; @@ -284,7 +286,7 @@ static LogicalResult emitOperation(Emitter &emitter, func::CallOp callOp) { return success(); } -static LogicalResult emitOperation(Emitter &emitter, +static LogicalResult emitOperation(cudaq::Emitter &emitter, quake::OperatorInterface optor) { // Handle adjoint for T and S StringRef name = ""; @@ -318,7 +320,7 @@ static LogicalResult emitOperation(Emitter &emitter, return success(); } -static LogicalResult emitOperation(Emitter &emitter, quake::MzOp op) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, quake::MzOp op) { if (op.getTargets().size() > 1) return op.emitError( "cannot translate measurements with more than one target"); @@ -335,28 +337,29 @@ static LogicalResult emitOperation(Emitter &emitter, quake::MzOp op) { return success(); } -static LogicalResult emitOperation(Emitter &emitter, quake::ResetOp op) { +static LogicalResult emitOperation(cudaq::Emitter &emitter, quake::ResetOp op) { emitter.os << "reset " << emitter.getOrAssignName(op.getTargets()) << ";"; return success(); } -static LogicalResult emitOperation(Emitter &emitter, Operation &op) { - using namespace quake; +static LogicalResult emitOperation(cudaq::Emitter &emitter, Operation &op) { return llvm::TypeSwitch(&op) // MLIR .Case([&](auto op) { return emitOperation(emitter, op); }) .Case([&](auto op) { return emitOperation(emitter, op); }) .Case([&](auto op) { return emitOperation(emitter, op); }) // Quake - .Case([&](auto op) { return emitOperation(emitter, op); }) - .Case([&](auto op) { return emitOperation(emitter, op); }) - .Case([&](auto op) { return emitOperation(emitter, op); }) - .Case( + .Case([&](auto op) { return emitOperation(emitter, op); }) + .Case( + [&](auto op) { return emitOperation(emitter, op); }) + .Case( + [&](auto op) { return emitOperation(emitter, op); }) + .Case( [&](auto optor) { return emitOperation(emitter, optor); }) - .Case([&](auto op) { return emitOperation(emitter, op); }) - .Case([&](auto op) { return emitOperation(emitter, op); }) + .Case([&](auto op) { return emitOperation(emitter, op); }) + .Case([&](auto op) { return emitOperation(emitter, op); }) // Ignore - .Case([&](auto op) { return success(); }) + .Case([&](auto op) { return success(); }) .Case([&](auto op) { return success(); }) .Case([&](auto op) { return success(); }) .Case([&](auto op) { return success(); }) @@ -365,13 +368,13 @@ static LogicalResult emitOperation(Emitter &emitter, Operation &op) { .Case([&](auto op) { return success(); }) .Case([&](auto op) { return success(); }) .Default([&](Operation *) -> LogicalResult { - if (op.getName().getDialectNamespace().equals("llvm")) + if (op.getName().getDialectNamespace() == "llvm") return success(); return op.emitOpError("unable to translate op to OpenQASM 2.0"); }); } LogicalResult cudaq::translateToOpenQASM(Operation *op, raw_ostream &os) { - Emitter emitter(os); + cudaq::Emitter emitter(os); return emitOperation(emitter, *op); } diff --git a/lib/Optimizer/CodeGen/WireSetsToProfileQIR.cpp b/lib/Optimizer/CodeGen/WireSetsToProfileQIR.cpp index 8f7370a9947..88a9318a920 100644 --- a/lib/Optimizer/CodeGen/WireSetsToProfileQIR.cpp +++ b/lib/Optimizer/CodeGen/WireSetsToProfileQIR.cpp @@ -15,13 +15,9 @@ #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h" #include "cudaq/Optimizer/CodeGen/QuakeToExecMgr.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "nlohmann/json.hpp" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Support/Debug.h" -#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassOptions.h" #include "mlir/Transforms/DialectConversion.h" @@ -65,10 +61,10 @@ struct QuakeTypeConverter : public TypeConverter { QuakeTypeConverter() { addConversion([](Type ty) { return ty; }); addConversion([](quake::WireType ty) { - return cudaq::opt::getQubitType(ty.getContext()); + return cudaq::cg::getQubitType(ty.getContext()); }); addConversion([](quake::MeasureType ty) { - return cudaq::opt::getResultType(ty.getContext()); + return cudaq::cg::getResultType(ty.getContext()); }); } }; @@ -124,8 +120,8 @@ struct GeneralRewrite : OpConversionPattern { if (funcName.ends_with(qis_ctl_suffix) && adaptor.getControls().size() == 1 && adaptor.getTargets().size() == 1) { auto *ctx = rewriter.getContext(); - auto qbTy = cudaq::opt::getQubitType(ctx); - auto arrTy = cudaq::opt::getArrayType(ctx); + auto qbTy = cudaq::cg::getQubitType(ctx); + auto arrTy = cudaq::cg::getArrayType(ctx); SmallVector argTys = {arrTy, qbTy}; ModuleOp mod = qop->template getParentOfType(); FlatSymbolRefAttr qisFuncSymbol; @@ -133,20 +129,20 @@ struct GeneralRewrite : OpConversionPattern { auto fTy = f.getFunctionType(); auto fSym = f.getSymNameAttr(); qisFuncSymbol = FlatSymbolRefAttr::get(ctx, funcName); - Value fVal = rewriter.create(loc, fTy, fSym); + Value fVal = func::ConstantOp::create(rewriter, loc, fTy, fSym); auto ptrI8Ty = cudaq::cc::PointerType::get(rewriter.getI8Type()); Value fPtrVal = - rewriter.create(loc, ptrI8Ty, fVal); - Value one = rewriter.create(loc, 1, 64); + cudaq::cc::FuncToPtrOp::create(rewriter, loc, ptrI8Ty, fVal); + Value one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); SmallVector callParamVals{one, fPtrVal, *adaptor.getControls().begin(), *adaptor.getTargets().begin()}; SmallVector qubits(adaptor.getControls().begin(), adaptor.getControls().end()); qubits.append(adaptor.getTargets().begin(), adaptor.getTargets().end()); - rewriter.create(loc, std::nullopt, - cudaq::opt::NVQIRInvokeWithControlBits, - callParamVals); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::NVQIRInvokeWithControlBits, + callParamVals); rewriter.replaceOp(qop, qubits); return success(); } @@ -155,8 +151,8 @@ struct GeneralRewrite : OpConversionPattern { SmallVector qubits(adaptor.getControls().begin(), adaptor.getControls().end()); qubits.append(adaptor.getTargets().begin(), adaptor.getTargets().end()); - rewriter.create(loc, std::nullopt, funcName, - adaptor.getOperands()); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, funcName, + adaptor.getOperands()); rewriter.replaceOp(qop, qubits); return success(); } @@ -173,12 +169,12 @@ struct BorrowWireRewrite : OpConversionPattern { ConversionPatternRewriter &rewriter) const override { auto id = borrowWire.getIdentity(); auto loc = borrowWire.getLoc(); - Value idCon = rewriter.create(loc, id, 64); + Value idCon = arith::ConstantIntOp::create(rewriter, loc, id, 64); auto imTy = cudaq::cc::PointerType::get(NoneType::get(rewriter.getContext())); - idCon = rewriter.create(loc, imTy, idCon); + idCon = cudaq::cc::CastOp::create(rewriter, loc, imTy, idCon); rewriter.replaceOpWithNewOp( - borrowWire, cudaq::opt::getQubitType(rewriter.getContext()), idCon); + borrowWire, cudaq::cg::getQubitType(rewriter.getContext()), idCon); return success(); } }; @@ -192,8 +188,8 @@ struct ResetRewrite : OpConversionPattern { SmallVector qubits{adaptor.getTargets()}; auto loc = reset.getLoc(); std::string funcName = toQisBodyName(std::string("reset")); - rewriter.create(loc, std::nullopt, funcName, - adaptor.getOperands()); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, funcName, + adaptor.getOperands()); rewriter.replaceOp(reset, qubits); return success(); } @@ -205,8 +201,8 @@ struct BranchRewrite : OpConversionPattern { LogicalResult matchAndRewrite(cf::BranchOp branchOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto qubitTy = cudaq::opt::getQubitType(rewriter.getContext()); - rewriter.startRootUpdate(branchOp); + auto qubitTy = cudaq::cg::getQubitType(rewriter.getContext()); + rewriter.startOpModification(branchOp); if (branchOp.getSuccessor()) for (auto arg : branchOp.getSuccessor()->getArguments()) if (isa(arg.getType())) @@ -214,7 +210,7 @@ struct BranchRewrite : OpConversionPattern { for (auto operand : branchOp.getOperands()) if (isa(operand.getType())) operand.setType(qubitTy); - rewriter.finalizeRootUpdate(branchOp); + rewriter.finalizeOpModification(branchOp); return success(); } }; @@ -225,8 +221,8 @@ struct CondBranchRewrite : OpConversionPattern { LogicalResult matchAndRewrite(cf::CondBranchOp branchOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto qubitTy = cudaq::opt::getQubitType(rewriter.getContext()); - rewriter.startRootUpdate(branchOp); + auto qubitTy = cudaq::cg::getQubitType(rewriter.getContext()); + rewriter.startOpModification(branchOp); for (auto suc : branchOp.getSuccessors()) for (auto arg : suc->getArguments()) if (isa(arg.getType())) @@ -234,7 +230,7 @@ struct CondBranchRewrite : OpConversionPattern { for (auto operand : branchOp.getOperands()) if (isa(operand.getType())) operand.setType(qubitTy); - rewriter.finalizeRootUpdate(branchOp); + rewriter.finalizeOpModification(branchOp); return success(); } }; @@ -283,15 +279,15 @@ struct MzRewrite : OpConversionPattern { // FIXME: Must use sequentially assigned result ids std::string funcName = toQisBodyName(std::string("mz")); auto loc = meas.getLoc(); - Value idCon = rewriter.create(loc, resultCount++, 64); + Value idCon = + arith::ConstantIntOp::create(rewriter, loc, resultCount++, 64); auto imTy = cudaq::cc::PointerType::get(NoneType::get(rewriter.getContext())); - idCon = rewriter.create(loc, imTy, idCon); - Value resultVal = rewriter.create( - loc, cudaq::opt::getResultType(rewriter.getContext()), idCon); - rewriter.create( - loc, std::nullopt, funcName, - ValueRange{adaptor.getTargets()[0], resultVal}); + idCon = cudaq::cc::CastOp::create(rewriter, loc, imTy, idCon); + Value resultVal = cudaq::cc::CastOp::create( + rewriter, loc, cudaq::cg::getResultType(rewriter.getContext()), idCon); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, funcName, + ValueRange{adaptor.getTargets()[0], resultVal}); rewriter.replaceOp(meas, ValueRange{resultVal, adaptor.getTargets()[0]}); auto regName = meas.getRegisterName(); @@ -306,15 +302,15 @@ struct MzRewrite : OpConversionPattern { auto arrI8Ty = mlir::LLVM::LLVMArrayType::get(rewriter.getI8Type(), regName->size() + 1); auto ptrArrTy = cudaq::cc::PointerType::get(arrI8Ty); - Value nameVal = rewriter.create( - loc, ptrArrTy, nameObj.getName()); + Value nameVal = cudaq::cc::AddressOfOp::create(rewriter, loc, ptrArrTy, + nameObj.getName()); auto cstrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); Value nameValCStr = - rewriter.create(loc, cstrTy, nameVal); + cudaq::cc::CastOp::create(rewriter, loc, cstrTy, nameVal); - rewriter.create(loc, std::nullopt, - cudaq::opt::QIRRecordOutput, - ValueRange{resultVal, nameValCStr}); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::QIRRecordOutput, + ValueRange{resultVal, nameValCStr}); } // Populate resultQubitVals[] @@ -361,15 +357,15 @@ struct DiscriminateRewrite : OpConversionPattern { auto arrI8Ty = mlir::LLVM::LLVMArrayType::get(rewriter.getI8Type(), iter->second.size() + 1); auto ptrArrTy = cudaq::cc::PointerType::get(arrI8Ty); - Value nameVal = rewriter.create(loc, ptrArrTy, - nameObj.getName()); + Value nameVal = cudaq::cc::AddressOfOp::create(rewriter, loc, ptrArrTy, + nameObj.getName()); auto cstrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); Value nameValCStr = - rewriter.create(loc, cstrTy, nameVal); + cudaq::cc::CastOp::create(rewriter, loc, cstrTy, nameVal); - rewriter.create( - loc, std::nullopt, cudaq::opt::QIRRecordOutput, - ValueRange{adaptor.getMeasurement(), nameValCStr}); + func::CallOp::create(rewriter, loc, mlir::TypeRange{}, + cudaq::opt::QIRRecordOutput, + ValueRange{adaptor.getMeasurement(), nameValCStr}); if (isAdaptiveProfile) { std::string funcName = toQisBodyName(std::string("read_result")); rewriter.replaceOpWithNewOp( @@ -377,7 +373,7 @@ struct DiscriminateRewrite : OpConversionPattern { ValueRange{adaptor.getMeasurement()}); } else { Value undef = - rewriter.create(loc, rewriter.getI1Type()); + cudaq::cc::UndefOp::create(rewriter, loc, rewriter.getI1Type()); rewriter.replaceOp(disc, undef); } return success(); @@ -477,7 +473,7 @@ struct WireSetToProfileQIRPrepPass auto loc = builder.getUnknownLoc(); auto createNewDecl = [&](const std::string &name, FunctionType ty) { - auto func = builder.create(loc, name, ty); + auto func = func::FuncOp::create(builder, loc, name, ty); func.setPrivate(); }; auto addNewDecl = [&](std::string &&suffix, FunctionType ty) { @@ -497,7 +493,7 @@ struct WireSetToProfileQIRPrepPass LLVM_DEBUG(llvm::dbgs() << "Module before prep:\n"; op.dump()); // Insert declarations for all the functions we *may* be using. - auto qbTy = cudaq::opt::getQubitType(ctx); + auto qbTy = cudaq::cg::getQubitType(ctx); auto targ1Ty = FunctionType::get(ctx, TypeRange{qbTy}, TypeRange{}); auto targ1CtrlTy = FunctionType::get(ctx, TypeRange{qbTy, qbTy}, TypeRange{}); @@ -539,7 +535,7 @@ struct WireSetToProfileQIRPrepPass addDecls("swap", targ2Ty, targ2CtrlTy); addBodyDecl("cnot", targ2Ty); - auto resTy = cudaq::opt::getResultType(ctx); + auto resTy = cudaq::cg::getResultType(ctx); auto measTy = FunctionType::get(ctx, TypeRange{qbTy, resTy}, TypeRange{}); addBodyDecl("mz", measTy); auto readResTy = FunctionType::get(ctx, TypeRange{resTy}, @@ -608,9 +604,8 @@ struct WireSetToProfileQIRPostPass callableRegion->getParentOfType(); if (auto reqQubits = - parentFuncOp - ->getAttr(cudaq::opt::qir0_1::RequiredQubitsAttrName) - .dyn_cast_or_null()) { + dyn_cast_if_present(parentFuncOp->getAttr( + cudaq::opt::qir0_1::RequiredQubitsAttrName))) { std::uint32_t thisFuncReqQubits = 0; if (!reqQubits.strref().getAsInteger(10, thisFuncReqQubits)) { auto thisFuncHighestIdentity = thisFuncReqQubits - 1; @@ -622,9 +617,8 @@ struct WireSetToProfileQIRPostPass } if (auto reqResults = - parentFuncOp - ->getAttr(cudaq::opt::qir0_1::RequiredResultsAttrName) - .dyn_cast_or_null()) { + dyn_cast_if_present(parentFuncOp->getAttr( + cudaq::opt::qir0_1::RequiredResultsAttrName))) { std::uint32_t thisFuncReqResults = 0; if (!reqResults.strref().getAsInteger(10, thisFuncReqResults)) { auto thisFuncHighestResult = thisFuncReqResults - 1; diff --git a/lib/Optimizer/Dialect/CC/CCOps.cpp b/lib/Optimizer/Dialect/CC/CCOps.cpp index f6def3c59e0..36dc7517212 100644 --- a/lib/Optimizer/Dialect/CC/CCOps.cpp +++ b/lib/Optimizer/Dialect/CC/CCOps.cpp @@ -50,7 +50,7 @@ std::optional cudaq::opt::factory::getDoubleIfConstant(Value value) { Value cudaq::cc::getByteSizeOfType(OpBuilder &builder, Location loc, Type ty, bool useSizeOf) { auto createInt = [&](std::int32_t byteWidth) -> Value { - return builder.create(loc, byteWidth, 64); + return arith::ConstantIntOp::create(builder, loc, byteWidth, 64); }; // Handle primitive types with constant sizes. @@ -91,8 +91,8 @@ Value cudaq::cc::getByteSizeOfType(OpBuilder &builder, Location loc, Type ty, return createInt(byteWidth); } if (useSizeOf) - return builder.create(loc, builder.getI64Type(), - strTy); + return cudaq::cc::SizeOfOp::create(builder, loc, builder.getI64Type(), + strTy); return {}; }) .Case([&](cudaq::cc::ArrayType arrTy) -> Value { @@ -103,8 +103,8 @@ Value cudaq::cc::getByteSizeOfType(OpBuilder &builder, Location loc, Type ty, if (!v) return {}; auto scale = createInt(arrTy.getSize()); - return builder.create(loc, builder.getI64Type(), v, - scale); + return arith::MulIOp::create(builder, loc, builder.getI64Type(), v, + scale); }) .Case([&](cudaq::cc::SpanLikeType) -> Value { // Uniformly on the device size: {ptr, i64} @@ -179,7 +179,7 @@ struct FuseAllocLength : public OpRewritePattern { Type oldTy = alloca.getElementType(); auto arrTy = cudaq::cc::ArrayType::get(context, oldTy, *size); Type origTy = alloca.getType(); - auto newAlloc = rewriter.create(loc, arrTy); + auto newAlloc = cudaq::cc::AllocaOp::create(rewriter, loc, arrTy); rewriter.replaceOpWithNewOp(alloca, origTy, newAlloc); return success(); @@ -206,19 +206,40 @@ LogicalResult cudaq::cc::AllocaOp::verify() { // CastOp //===----------------------------------------------------------------------===// -OpFoldResult cudaq::cc::CastOp::fold(FoldAdaptor adaptor) { - // If cast is a nop, just forward the argument to the uses. - if (getType() == getValue().getType()) - return getValue(); - if (auto optConst = adaptor.getValue()) { +namespace { +/// This pattern folds casts of (some) constants into new constant ops. This is +/// meant to eliminate cast operations when result values are clearly +/// computable. +struct FoldCastOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(cudaq::cc::CastOp kast, + PatternRewriter &rewriter) const override { + // If cast is a nop, just forward the argument to the uses. + auto ty = kast.getType(); + if (ty == kast.getValue().getType()) { + Value val = kast.getValue(); + rewriter.replaceOp(kast, val); + return success(); + } + + Operation *defOp = kast.getValue().getDefiningOp(); + if (!defOp) + return failure(); + + Attribute optConst; + if (!matchPattern(kast.getValue(), m_Constant(&optConst))) + return failure(); + // Replace a constant + cast with a new constant of an updated type. - auto ty = getType(); - OpBuilder builder(*this); - auto fltTy = builder.getF32Type(); - auto dblTy = builder.getF64Type(); - auto loc = getLoc(); + auto fltTy = rewriter.getF32Type(); + auto dblTy = rewriter.getF64Type(); + auto loc = kast.getLoc(); + auto truncate = [&](std::int64_t val) -> std::int64_t { - auto srcTy = getValue().getType(); + auto srcTy = kast.getValue().getType(); + if (!srcTy.isIntOrFloat()) + return val; auto srcWidth = srcTy.getIntOrFloatBitWidth(); // Zero-extend to get the original integer value. if (srcWidth < 64) @@ -231,42 +252,51 @@ OpFoldResult cudaq::cc::CastOp::fold(FoldAdaptor adaptor) { if (isa(ty)) { auto width = ty.getIntOrFloatBitWidth(); - if (getZint()) + if (kast.getZint()) val = truncate(val); if (width == 1) { + // C++ semantics. 0 is false. All other values are true. bool v = val != 0; - return builder.create(loc, v, width) - .getResult(); + auto c = arith::ConstantIntOp::create(rewriter, loc, v, width); + rewriter.replaceOp(kast, c); + return success(); } - return builder.create(loc, val, width) - .getResult(); - - } else if (ty == fltTy) { - if (getZint()) { + auto c = arith::ConstantIntOp::create(rewriter, loc, val, width); + rewriter.replaceOp(kast, c); + return success(); + } + if (ty == fltTy) { + if (kast.getZint()) { val = truncate(val); APFloat fval(static_cast(static_cast(val))); - return builder.create(loc, fval, fltTy) - .getResult(); + auto c = arith::ConstantFloatOp::create(rewriter, loc, fltTy, fval); + rewriter.replaceOp(kast, c); + return success(); } - if (getSint()) { + if (kast.getSint()) { APFloat fval(static_cast(val)); - return builder.create(loc, fval, fltTy) - .getResult(); + auto c = arith::ConstantFloatOp::create(rewriter, loc, fltTy, fval); + rewriter.replaceOp(kast, c); + return success(); } - } else if (ty == dblTy) { - if (getZint()) { + } + if (ty == dblTy) { + if (kast.getZint()) { val = truncate(val); APFloat fval(static_cast(static_cast(val))); - return builder.create(loc, fval, dblTy) - .getResult(); + auto c = arith::ConstantFloatOp::create(rewriter, loc, dblTy, fval); + rewriter.replaceOp(kast, c); + return success(); } - if (getSint()) { + if (kast.getSint()) { APFloat fval(static_cast(val)); - return builder.create(loc, fval, dblTy) - .getResult(); + auto c = arith::ConstantFloatOp::create(rewriter, loc, dblTy, fval); + rewriter.replaceOp(kast, c); + return success(); } } + return failure(); } // %5 = arith.constant ... : F1 @@ -278,27 +308,32 @@ OpFoldResult cudaq::cc::CastOp::fold(FoldAdaptor adaptor) { if (ty == fltTy) { float f = val.convertToDouble(); APFloat fval(f); - return builder.create(loc, fval, fltTy) - .getResult(); + auto c = arith::ConstantFloatOp::create(rewriter, loc, fltTy, fval); + rewriter.replaceOp(kast, c); + return success(); } if (ty == dblTy) { APFloat fval{val.convertToDouble()}; - return builder.create(loc, fval, dblTy) - .getResult(); + auto c = arith::ConstantFloatOp::create(rewriter, loc, dblTy, fval); + rewriter.replaceOp(kast, c); + return success(); } if (isa(ty)) { auto width = ty.getIntOrFloatBitWidth(); - if (getZint()) { + if (kast.getZint()) { std::uint64_t v = val.convertToDouble(); - return builder.create(loc, v, width) - .getResult(); + auto c = arith::ConstantIntOp::create(rewriter, loc, v, width); + rewriter.replaceOp(kast, c); + return success(); } - if (getSint()) { + if (kast.getSint()) { std::int64_t v = val.convertToDouble(); - return builder.create(loc, v, width) - .getResult(); + auto c = arith::ConstantIntOp::create(rewriter, loc, v, width); + rewriter.replaceOp(kast, c); + return success(); } } + return failure(); } // %5 = complex.constant ... : complex @@ -306,6 +341,8 @@ OpFoldResult cudaq::cc::CastOp::fold(FoldAdaptor adaptor) { // ──────────────────────────────────────────── // %6 = complex.constant ... : complex if (auto attr = dyn_cast(optConst)) { + if (!isa(ty)) + return failure(); auto eleTy = cast(ty).getElementType(); auto reFp = dyn_cast(attr[0]); auto imFp = dyn_cast(attr[1]); @@ -313,24 +350,35 @@ OpFoldResult cudaq::cc::CastOp::fold(FoldAdaptor adaptor) { if (eleTy == fltTy) { float reVal = reFp.getValue().convertToDouble(); float imVal = imFp.getValue().convertToDouble(); - auto rePart = builder.getFloatAttr(eleTy, APFloat{reVal}); - auto imPart = builder.getFloatAttr(eleTy, APFloat{imVal}); - auto cv = builder.getArrayAttr({rePart, imPart}); - return builder.create(loc, ty, cv).getResult(); + auto rePart = rewriter.getFloatAttr(eleTy, APFloat{reVal}); + auto imPart = rewriter.getFloatAttr(eleTy, APFloat{imVal}); + auto cv = rewriter.getArrayAttr({rePart, imPart}); + auto c = + complex::ConstantOp::create(rewriter, loc, ty, cv).getResult(); + rewriter.replaceOp(kast, c); + return success(); } if (eleTy == dblTy) { double reVal = reFp.getValue().convertToDouble(); double imVal = imFp.getValue().convertToDouble(); - auto rePart = builder.getFloatAttr(eleTy, APFloat{reVal}); - auto imPart = builder.getFloatAttr(eleTy, APFloat{imVal}); - auto cv = builder.getArrayAttr({rePart, imPart}); - return builder.create(loc, ty, cv).getResult(); + auto rePart = rewriter.getFloatAttr(eleTy, APFloat{reVal}); + auto imPart = rewriter.getFloatAttr(eleTy, APFloat{imVal}); + auto cv = rewriter.getArrayAttr({rePart, imPart}); + auto c = + complex::ConstantOp::create(rewriter, loc, ty, cv).getResult(); + rewriter.replaceOp(kast, c); + return success(); } } + // Might be a complex integer? Ignore for now. + return failure(); } + + // this is not a constant we try to fold. + return failure(); } - return nullptr; -} +}; +} // namespace LogicalResult cudaq::cc::CastOp::verify() { auto inTy = getValue().getType(); @@ -536,7 +584,7 @@ struct FuseComplexRe : public OpRewritePattern { if (comcon) { FloatType fltTy = reop.getType(); APFloat reVal = cast(comcon.getValue()[0]).getValue(); - rewriter.replaceOpWithNewOp(reop, reVal, fltTy); + rewriter.replaceOpWithNewOp(reop, fltTy, reVal); return success(); } return failure(); @@ -551,7 +599,7 @@ struct FuseComplexIm : public OpRewritePattern { if (comcon) { FloatType fltTy = imop.getType(); APFloat imVal = cast(comcon.getValue()[1]).getValue(); - rewriter.replaceOpWithNewOp(imop, imVal, fltTy); + rewriter.replaceOpWithNewOp(imop, fltTy, imVal); return success(); } return failure(); @@ -567,7 +615,7 @@ getArbitraryCustomCanonicalizationPatterns(RewritePatternSet &patterns, void cudaq::cc::CastOp::getCanonicalizationPatterns(RewritePatternSet &patterns, MLIRContext *context) { - patterns.add(context); + patterns.add(context); getArbitraryCustomCanonicalizationPatterns(patterns, context); } @@ -613,7 +661,7 @@ void printInterleavedIndices(OpAsmPrinter &printer, B computePtrOp, if (Value val = dyn_cast(cst)) printer.printOperand(val); else - printer << cst.get().getInt(); + printer << cast(cst).getInt(); }); } @@ -700,7 +748,8 @@ void destructureIndices(Type currType, ArrayRef indices, dynamicIndices.push_back(val); } else { rawConstantIndices.push_back( - iter.template get()); + iter.template dyn_cast< + cudaq::cc::InterleavedArgumentConstantIndex>()); } currType = @@ -737,51 +786,66 @@ void cudaq::cc::ComputePtrOp::build(OpBuilder &builder, OperationState &result, result.addOperands(dynamicIndices); } -OpFoldResult cudaq::cc::ComputePtrOp::fold(FoldAdaptor adaptor) { - if (getDynamicIndices().empty()) - return nullptr; - // Params is a list of possible substitutions (Attributes) the length of the - // SSA arguments. Skip the first one, which is the base pointer argument. - auto paramIter = adaptor.getOperands().begin(); - ++paramIter; - - auto dynamicIndexIter = getDynamicIndices().begin(); - SmallVector newConstantIndices; - SmallVector newIndices; - bool changed = false; - - // Build lists of raw constants and SSA values with the SSA values that have - // substituions omitted and properly interleaved in as constants in the first - // list. - for (auto index : getRawConstantIndices()) { - if (index != kDynamicIndex) { - newConstantIndices.push_back(index); - continue; - } - if (auto newVal = dyn_cast_if_present(*paramIter)) { - newConstantIndices.push_back(newVal.getInt()); - changed = true; - } else { - newConstantIndices.push_back(kDynamicIndex); - newIndices.push_back(*dynamicIndexIter); - } - ++dynamicIndexIter; +namespace { +struct FoldComputePtrOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(cudaq::cc::ComputePtrOp ptrOp, + PatternRewriter &rewriter) const override { + if (ptrOp.getDynamicIndices().empty()) + return failure(); + + // Params is a list of possible substitutions (Attributes) the length of the + // SSA arguments. Skip the first one, which is the base pointer argument. + auto paramIter = ptrOp.getOperands().begin(); ++paramIter; - } - // If any new constants were found, update the cc.compute_ptr in place, adding - // the new constants and dropping any unneeded SSA arguments on the floor. - if (changed) { - assert(newConstantIndices.size() == getRawConstantIndices().size()); - assert(newIndices.size() < getDynamicIndices().size()); - getDynamicIndicesMutable().assign(newIndices); - setRawConstantIndices(newConstantIndices); - return Value{*this}; + auto dynamicIndexIter = ptrOp.getDynamicIndices().begin(); + SmallVector newConstantIndices; + SmallVector newIndices; + bool changed = false; + + // Build lists of raw constants and SSA values with the SSA values that have + // substituions omitted and properly interleaved in as constants in the + // first list. + for (auto index : ptrOp.getRawConstantIndices()) { + if (index != cudaq::cc::ComputePtrOp::kDynamicIndex) { + newConstantIndices.push_back(index); + continue; + } + + Attribute konstant; + bool handleNonConstant = true; + if (matchPattern(*paramIter, m_Constant(&konstant))) + if (auto newVal = dyn_cast_if_present(konstant)) { + newConstantIndices.push_back(newVal.getInt()); + changed = true; + handleNonConstant = false; + } + if (handleNonConstant) { + newConstantIndices.push_back(cudaq::cc::ComputePtrOp::kDynamicIndex); + newIndices.push_back(*dynamicIndexIter); + } + ++dynamicIndexIter; + ++paramIter; + } + + // If any new constants were found, update the cc.compute_ptr in place, + // adding the new constants and dropping any unneeded SSA arguments on the + // floor. + if (!changed) + return failure(); + + assert(newConstantIndices.size() == ptrOp.getRawConstantIndices().size()); + assert(newIndices.size() < ptrOp.getDynamicIndices().size()); + rewriter.modifyOpInPlace(ptrOp, [&]() { + ptrOp.getDynamicIndicesMutable().assign(newIndices); + ptrOp.setRawConstantIndices(newConstantIndices); + }); + return success(); } - return nullptr; -} +}; -namespace { /// If two (or more) `cc.compute_ptr` are chained then they can be fused into a /// single `cc.compute_ptr`. struct FuseAddressArithmetic @@ -876,8 +940,8 @@ struct FuseAddressArithmetic auto eleTy = cast(ptrTy.getElementType()); auto subTy = eleTy.getElementType(); auto simpleTy = cudaq::cc::PointerType::get(subTy); - auto simple = rewriter.create( - ptrOp.getLoc(), simpleTy, ptrOp.getBase()); + auto simple = cudaq::cc::CastOp::create(rewriter, ptrOp.getLoc(), + simpleTy, ptrOp.getBase()); // Collect indices. auto iter = ptrOp.getDynamicIndices().begin(); @@ -901,7 +965,7 @@ struct FuseAddressArithmetic void cudaq::cc::ComputePtrOp::getCanonicalizationPatterns( RewritePatternSet &patterns, MLIRContext *context) { - patterns.add(context); + patterns.add(context); } std::optional @@ -969,50 +1033,66 @@ LogicalResult cudaq::cc::ExtractValueOp::verify() { return success(); } -OpFoldResult cudaq::cc::ExtractValueOp::fold(FoldAdaptor adaptor) { - if (indicesAreConstant()) - return nullptr; +namespace { +struct FoldExtractOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(cudaq::cc::ExtractValueOp extval, + PatternRewriter &rewriter) const override { + if (extval.indicesAreConstant()) + return failure(); - // Params is a list of possible substitutions (Attributes) the length of the - // SSA arguments. Skip the first one, which is the base pointer argument. - auto paramIter = adaptor.getOperands().begin(); - ++paramIter; + // Params is a list of possible substitutions (Attributes) the length of the + // SSA arguments. Skip the first one, which is the base pointer argument. + auto paramIter = extval.getOperands().begin(); + ++paramIter; - auto dynamicIndexIter = getDynamicIndices().begin(); - SmallVector newConstantIndices; - SmallVector newIndices; - bool changed = false; + auto dynamicIndexIter = extval.getDynamicIndices().begin(); + SmallVector newConstantIndices; + SmallVector newIndices; + bool changed = false; + + // Build lists of raw constants and SSA values with the SSA values that have + // substituions omitted and properly interleaved in as constants in the + // first list. + for (auto index : extval.getRawConstantIndices()) { + if (index != cudaq::cc::ExtractValueOp::kDynamicIndex) { + newConstantIndices.push_back(index); + continue; + } - // Build lists of raw constants and SSA values with the SSA values that have - // substituions omitted and properly interleaved in as constants in the first - // list. - for (auto index : getRawConstantIndices()) { - if (index != kDynamicIndex) { - newConstantIndices.push_back(index); - continue; - } - if (auto newVal = dyn_cast_if_present(*paramIter)) { - newConstantIndices.push_back(newVal.getInt()); - changed = true; - } else { - newConstantIndices.push_back(kDynamicIndex); - newIndices.push_back(*dynamicIndexIter); + Attribute konstant; + bool handleNonConstant = true; + if (matchPattern(*paramIter, m_Constant(&konstant))) + if (auto newVal = dyn_cast_if_present(konstant)) { + newConstantIndices.push_back(newVal.getInt()); + changed = true; + handleNonConstant = false; + } + if (handleNonConstant) { + newConstantIndices.push_back(cudaq::cc::ExtractValueOp::kDynamicIndex); + newIndices.push_back(*dynamicIndexIter); + } + ++dynamicIndexIter; + ++paramIter; } - ++dynamicIndexIter; - ++paramIter; - } - // If any new constants were found, update the cc.compute_ptr in place, adding - // the new constants and dropping any unneeded SSA arguments on the floor. - if (changed) { - assert(newConstantIndices.size() == getRawConstantIndices().size()); - assert(newIndices.size() < getDynamicIndices().size()); - getDynamicIndicesMutable().assign(newIndices); - setRawConstantIndices(newConstantIndices); - return Value{*this}; + // If any new constants were found, update the cc.compute_ptr in place, + // adding the new constants and dropping any unneeded SSA arguments on the + // floor. + if (!changed) + return failure(); + + assert(newConstantIndices.size() == extval.getRawConstantIndices().size()); + assert(newIndices.size() < extval.getDynamicIndices().size()); + rewriter.modifyOpInPlace(extval, [&]() { + extval.getDynamicIndicesMutable().assign(newIndices); + extval.setRawConstantIndices(newConstantIndices); + }); + return success(); } - return nullptr; -} +}; +} // namespace static ParseResult parseExtractValueIndices( OpAsmParser &parser, @@ -1083,16 +1163,16 @@ struct FuseWithConstantArray if (auto intTy = dyn_cast(extval.getType())) { std::int32_t i = extval.getRawConstantIndices()[0]; auto cval = cast(conarr.getConstantValues()[i]).getInt(); - rewriter.replaceOpWithNewOp(extval, cval, - intTy); + rewriter.replaceOpWithNewOp(extval, intTy, + cval); return success(); } if (auto fltTy = dyn_cast(extval.getType())) { std::int32_t i = extval.getRawConstantIndices()[0]; auto cval = cast(conarr.getConstantValues()[i]).getValue(); - rewriter.replaceOpWithNewOp(extval, cval, - fltTy); + rewriter.replaceOpWithNewOp(extval, fltTy, + cval); return success(); } @@ -1111,7 +1191,7 @@ struct FuseWithConstantArray void cudaq::cc::ExtractValueOp::getCanonicalizationPatterns( RewritePatternSet &patterns, MLIRContext *context) { - patterns.add(context); + patterns.add(context); } //===----------------------------------------------------------------------===// @@ -1368,8 +1448,8 @@ struct ForwardStdvecInitSize if (auto arrTy = dyn_cast(init.getBuffer().getType())) if (!arrTy.isUnknownSize()) { - rewriter.replaceOpWithNewOp( - size, arrTy.getSize(), ty); + rewriter.replaceOpWithNewOp(size, ty, + arrTy.getSize()); return success(); } } @@ -1387,9 +1467,6 @@ void cudaq::cc::StdvecSizeOp::getCanonicalizationPatterns( // LoopOp //===----------------------------------------------------------------------===// -// Override the default. -Region &cudaq::cc::LoopOp::getLoopBody() { return getBodyRegion(); } - // The basic block of the step region must end in a continue op, which need not // be pretty printed if the loop has no block arguments. This ensures the step // block is properly terminated. @@ -1401,7 +1478,7 @@ static void ensureStepTerminator(OpBuilder &builder, OperationState &result, auto addContinue = [&]() { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(block); - builder.create(result.location); + cudaq::cc::ContinueOp::create(builder, result.location); }; if (block->empty()) { addContinue(); @@ -1629,69 +1706,82 @@ bool cudaq::cc::LoopOp::hasBreakInBody() { } void cudaq::cc::LoopOp::getSuccessorRegions( - std::optional index, ArrayRef operands, - SmallVectorImpl ®ions) { - if (!index) { + RegionBranchPoint point, SmallVectorImpl ®ions) { + if (point.isParent()) { // loop op, successor is either the WHILE region, or the DO region if loop // is post conditional. if (isPostConditional()) - regions.push_back( - RegionSuccessor(&getBodyRegion(), getDoEntryArguments())); + regions.emplace_back(&getBodyRegion(), getDoEntryArguments()); else - regions.push_back( - RegionSuccessor(&getWhileRegion(), getWhileArguments())); + regions.emplace_back(&getWhileRegion(), getWhileArguments()); return; } - switch (index.value()) { - case 0: - // WHILE region, successors are the DO region and either the owning loop op - // (if no else region is present) or the else region. - regions.push_back(RegionSuccessor(&getBodyRegion(), getDoEntryArguments())); + + Operation *pred = point.getTerminatorPredecessorOrNull(); + assert(pred && "must have a terminator"); + Region *region = pred->getParentRegion(); + assert(region && "must have a region"); + if (region == &getWhileRegion()) { + // WHILE region, successors are the owning loop op and the DO region. + regions.emplace_back(&getBodyRegion(), getDoEntryArguments()); if (hasPythonElse()) - regions.push_back( - RegionSuccessor(&getElseRegion(), getElseEntryArguments())); + regions.emplace_back(&getElseRegion(), getElseEntryArguments()); else - regions.push_back(RegionSuccessor(getResults())); - break; - case 1: + regions.emplace_back(getOperation(), getResults()); + } else if (region == &getBodyRegion()) { // DO region, successor is STEP region (2) if present, or WHILE region (0) // if STEP is absent. if (hasStep()) - regions.push_back(RegionSuccessor(&getStepRegion(), getStepArguments())); + regions.emplace_back(&getStepRegion(), getStepArguments()); else - regions.push_back( - RegionSuccessor(&getWhileRegion(), getWhileArguments())); + regions.emplace_back(&getWhileRegion(), getWhileArguments()); // If the body contains a break, then the loop op is also a successor. if (hasBreakInBody()) - regions.push_back(RegionSuccessor(getResults())); - break; - case 2: + regions.emplace_back(getOperation(), getResults()); + } else if (region == &getStepRegion()) { // STEP region, if present, WHILE region is always successor. if (hasStep()) - regions.push_back( - RegionSuccessor(&getWhileRegion(), getWhileArguments())); - break; - case 3: + regions.emplace_back(&getWhileRegion(), getWhileArguments()); + } else if (region == &getElseRegion()) { // ELSE region, successors are the owning loop op. if (hasPythonElse()) - regions.push_back(RegionSuccessor(getResults())); - break; + regions.emplace_back(getOperation(), getResults()); + } else { + emitOpError("unhandled region"); } } OperandRange -cudaq::cc::LoopOp::getSuccessorEntryOperands(std::optional index) { - assert(index && "invalid index region"); - switch (*index) { - case 0: - if (!isPostConditional()) - return getInitialArgs(); - break; - case 1: - if (isPostConditional()) - return getInitialArgs(); - break; - } +cudaq::cc::LoopOp::getEntrySuccessorOperands(RegionSuccessor successor) { + // If the successor is the 'while' region (Region #0), pass the initial args. + if (successor.getSuccessor() == &getWhileRegion()) + return getInitialArgs(); + + auto *region = successor.getSuccessor(); + if (region == &getWhileRegion() && !isPostConditional()) + return getInitialArgs(); + if (region == &getBodyRegion() && isPostConditional()) + return getInitialArgs(); + + // Otherwise, no operands are passed from the parent. + return {nullptr, 0}; +} + +SmallVector cudaq::cc::LoopOp::getLoopRegions() { + return {&getWhileRegion(), &getBodyRegion(), &getStepRegion()}; +} + +OperandRange +cudaq::cc::LoopOp::getEntrySuccessorOperands(RegionBranchPoint point) { + llvm::errs() << "getEntrySuccessorOperands: " << point << "\n"; + assert(!point.isParent() && "invalid index region"); + Operation *pred = point.getTerminatorPredecessorOrNull(); + assert(pred && "must have a terminator"); + Region *region = pred->getParentRegion(); + if (region == &getWhileRegion() && !isPostConditional()) + return getInitialArgs(); + if (region == &getBodyRegion() && isPostConditional()) + return getInitialArgs(); return {nullptr, 0}; } @@ -1845,7 +1935,7 @@ static void ensureScopeRegionTerminator(OpBuilder &builder, } OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(block); - builder.create(result.location); + cudaq::cc::ContinueOp::create(builder, result.location); } ParseResult cudaq::cc::ScopeOp::parse(OpAsmParser &parser, @@ -1865,13 +1955,12 @@ void cudaq::cc::ScopeOp::getRegionInvocationBounds( ArrayRef attrs, SmallVectorImpl &bounds) {} void cudaq::cc::ScopeOp::getSuccessorRegions( - std::optional index, ArrayRef operands, - SmallVectorImpl ®ions) { - if (!index) { - regions.push_back(RegionSuccessor(&getRegion())); + RegionBranchPoint point, SmallVectorImpl ®ions) { + if (point.isParent()) { + regions.emplace_back(&getRegion()); return; } - regions.push_back(RegionSuccessor(getResults())); + regions.emplace_back(getOperation(), getResults()); } // If quantumAllocs, then just look for any allocate memory effect. Otherwise, @@ -1942,7 +2031,7 @@ struct EraseScopeWhenNotNeeded : public OpRewritePattern { succBlock = rewriter.createBlock( splitBlock, scope.getResultTypes(), SmallVector(scope.getNumResults(), loc)); - rewriter.create(loc, splitBlock); + cf::BranchOp::create(rewriter, loc, splitBlock); } // Inline the cc.scope's region into the parent and create a branch to the // new successor block. @@ -1951,13 +2040,13 @@ struct EraseScopeWhenNotNeeded : public OpRewritePattern { auto *initTerminator = initRegion.back().getTerminator(); auto initTerminatorOperands = initTerminator->getOperands(); rewriter.setInsertionPointToEnd(&initRegion.back()); - rewriter.create(loc, succBlock, initTerminatorOperands); + cf::BranchOp::create(rewriter, loc, succBlock, initTerminatorOperands); rewriter.eraseOp(initTerminator); rewriter.inlineRegionBefore(initRegion, succBlock); // Replace the cc.scope with a branch to the newly inlined region's entry // block. rewriter.setInsertionPointToEnd(scopeBlock); - rewriter.create(loc, initBlock, ValueRange{}); + cf::BranchOp::create(rewriter, loc, initBlock, ValueRange{}); rewriter.replaceOp(scope, succBlock->getArguments()); return success(); } @@ -2045,7 +2134,7 @@ static void ensureIfRegionTerminator(OpBuilder &builder, OperationState &result, } OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToEnd(block); - builder.create(result.location); + cudaq::cc::ContinueOp::create(builder, result.location); } ParseResult cudaq::cc::IfOp::parse(OpAsmParser &parser, @@ -2113,16 +2202,31 @@ void cudaq::cc::IfOp::getRegionInvocationBounds( } void cudaq::cc::IfOp::getSuccessorRegions( - std::optional index, ArrayRef operands, - SmallVectorImpl ®ions) { - if (index) { - regions.push_back(RegionSuccessor(getResults())); + RegionBranchPoint point, SmallVectorImpl ®ions) { + if (point.isParent()) { + regions.emplace_back(&getThenRegion()); + if (!getElseRegion().empty()) + regions.emplace_back(&getElseRegion()); + } else { + regions.emplace_back(getOperation(), getResults()); + } +} + +void cudaq::cc::IfOp::getEntrySuccessorRegions( + ArrayRef operands, SmallVectorImpl ®ions) { + FoldAdaptor adaptor(operands); + auto boolAttr = dyn_cast_or_null(adaptor.getCondition()); + if (!boolAttr) + return; + if (boolAttr.getValue()) { + regions.emplace_back(&getThenRegion()); + return; + } + if (!getElseRegion().empty()) { + regions.emplace_back(&getElseRegion()); return; } - // TODO: can constant fold if the condition is a constant here. - regions.push_back(RegionSuccessor(&getThenRegion())); - if (!getElseRegion().empty()) - regions.push_back(RegionSuccessor(&getElseRegion())); + regions.emplace_back(getOperation(), getResults()); } template @@ -2136,7 +2240,7 @@ LogicalResult cudaq::cc::verifyConvergentLinearTypesInRegions(Operation *op) { if (!regionOp) return failure(); SmallVector successors; - regionOp.getSuccessorRegions(std::nullopt, {}, successors); + regionOp.getSuccessorRegions(RegionBranchPoint::parent(), successors); // For each region successor, determine the number of distinct linear-typed // definitions in the region. long linearMax = -1; @@ -2168,18 +2272,78 @@ struct KillRegionIfConstant : public OpRewritePattern { // This rewrite will determine if the condition is constant. If it is, then it // will elide the true or false region completely, depending on the constant's - // value. + // value. For cc.if ops with results, it inlines the surviving region and + // replaces the results with the cc.continue operands. LogicalResult matchAndRewrite(cudaq::cc::IfOp ifOp, PatternRewriter &rewriter) const override { auto cond = ifOp.getCondition(); - if (!ifOp.getResults().empty()) - return failure(); auto con = cond.getDefiningOp(); if (!con) return failure(); auto val = con.value(); auto loc = ifOp.getLoc(); - auto truth = rewriter.create(loc, 1, 1); + + // Handle cc.if with results by inlining the surviving region. + if (!ifOp.getResults().empty()) { + Region *survivingRegion = nullptr; + if (val) { + // Condition is true: use then region. + survivingRegion = &ifOp.getThenRegion(); + } else { + // Condition is false: use else region if it exists. + if (ifOp.getElseRegion().empty()) { + // No else region and condition is false - this shouldn't happen for + // a well-formed cc.if with results, but handle it gracefully. + return failure(); + } + survivingRegion = &ifOp.getElseRegion(); + } + + // The surviving region should have a single block ending in cc.continue. + if (survivingRegion->empty()) + return failure(); + + // Collect results from all cc.continue ops and inline the region. + // For a proper cc.if with results, there should be exactly one path + // through each region ending in cc.continue. + SmallVector results; + Block &entryBlock = survivingRegion->front(); + + // Find the terminator cc.continue to get the result values. + // We need to walk all blocks because there might be nested control flow. + for (Block &block : *survivingRegion) { + if (auto contOp = + dyn_cast(block.getTerminator())) { + // For single-block regions, just grab the operands. + if (survivingRegion->hasOneBlock()) { + results = llvm::to_vector(contOp.getOperands()); + rewriter.eraseOp(contOp); + break; + } + } + } + + // If we couldn't find a simple single-block case, fall back to creating + // a new cc.if with only the surviving region. + if (results.empty() || results.size() != ifOp.getNumResults()) { + auto truth = arith::ConstantIntOp::create(rewriter, loc, 1, 1); + rewriter.replaceOpWithNewOp( + ifOp, ifOp.getResultTypes(), truth, + [&](OpBuilder &, Location, Region ®ion) { + region.takeBody(*survivingRegion); + }); + return success(); + } + + // Inline the surviving region's block before the cc.if, replacing + // block arguments with the cc.if's linear args. + rewriter.inlineBlockBefore(&entryBlock, ifOp, ifOp.getLinearArgs()); + rewriter.replaceOp(ifOp, results); + return success(); + } + + // Original logic for cc.if without results. + auto truth = arith::ConstantIntOp::create(rewriter, loc, 1, 1); Region *newRegion = nullptr; if (val) { // The else block, if any, is dead. @@ -2194,7 +2358,7 @@ struct KillRegionIfConstant : public OpRewritePattern { OpBuilder::InsertionGuard guard(rewriter); Block *block = new Block(); rewriter.setInsertionPointToEnd(block); - rewriter.create(loc); + cudaq::cc::ContinueOp::create(rewriter, loc); newRegion->push_back(block); } } @@ -2381,8 +2545,8 @@ LogicalResult cudaq::cc::ConditionOp::verify() { return success(); } -MutableOperandRange cudaq::cc::ConditionOp::getMutableSuccessorOperands( - std::optional index) { +MutableOperandRange +cudaq::cc::ConditionOp::getMutableSuccessorOperands(RegionSuccessor point) { return getResultsMutable(); } @@ -2522,8 +2686,8 @@ struct FoldTrivialOffsetOf : public OpRewritePattern { PatternRewriter &rewriter) const override { // If there are no offsets, the offset is 0. if (offOp.getConstantIndices().empty()) { - rewriter.replaceOpWithNewOp(offOp, 0, - offOp.getType()); + rewriter.replaceOpWithNewOp(offOp, offOp.getType(), + 0); return success(); } @@ -2531,8 +2695,8 @@ struct FoldTrivialOffsetOf : public OpRewritePattern { if (std::all_of(offOp.getConstantIndices().begin(), offOp.getConstantIndices().end(), [](std::int32_t i) { return i == 0; })) { - rewriter.replaceOpWithNewOp(offOp, 0, - offOp.getType()); + rewriter.replaceOpWithNewOp(offOp, offOp.getType(), + 0); return success(); } @@ -2600,8 +2764,8 @@ struct ConstArrayConvertToKnownSize std::size_t size = connie.getConstantValuesAttr().size(); auto *ctx = rewriter.getContext(); auto newTy = cudaq::cc::ArrayType::get(ctx, arrTy.getElementType(), size); - auto ca = rewriter.create( - connie.getLoc(), newTy, connie.getConstantValuesAttr()); + auto ca = cudaq::cc::ConstantArrayOp::create( + rewriter, connie.getLoc(), newTy, connie.getConstantValuesAttr()); rewriter.replaceOpWithNewOp(connie, arrTy, ca); return success(); } @@ -2689,10 +2853,10 @@ struct ReplaceConstantSizes : public OpRewritePattern { auto sizeOpSz = sizeOp.getType().getIntOrFloatBitWidth(); auto loc = sizeOp.getLoc(); if (sizeOpSz < vSz) - v = rewriter.create(loc, sizeOp.getType(), v); + v = cudaq::cc::CastOp::create(rewriter, loc, sizeOp.getType(), v); else - v = rewriter.create( - loc, sizeOp.getType(), v, cudaq::cc::CastOpMode::Unsigned); + v = cudaq::cc::CastOp::create(rewriter, loc, sizeOp.getType(), v, + cudaq::cc::CastOpMode::Unsigned); } rewriter.replaceOp(sizeOp, v); return success(); diff --git a/lib/Optimizer/Dialect/CC/CCTypes.cpp b/lib/Optimizer/Dialect/CC/CCTypes.cpp index 75be57ad612..77fed739128 100644 --- a/lib/Optimizer/Dialect/CC/CCTypes.cpp +++ b/lib/Optimizer/Dialect/CC/CCTypes.cpp @@ -85,25 +85,17 @@ void cc::StructType::print(AsmPrinter &printer) const { printer << '>'; } -unsigned +llvm::TypeSize cc::StructType::getTypeSizeInBits(const DataLayout &dataLayout, DataLayoutEntryListRef params) const { - return static_cast(getBitSize()); + return llvm::TypeSize::getFixed(getBitSize()); } -unsigned cc::StructType::getABIAlignment(const DataLayout &dataLayout, +uint64_t cc::StructType::getABIAlignment(const DataLayout &dataLayout, DataLayoutEntryListRef params) const { return getAlignment(); } -unsigned -cc::StructType::getPreferredAlignment(const DataLayout &dataLayout, - DataLayoutEntryListRef params) const { - // No distinction between ABI and preferred alignments for now. Clang just - // gives us an alignment value. - return getAlignment(); -} - LogicalResult cc::StructType::verify(llvm::function_ref emitError, mlir::StringAttr, llvm::ArrayRef members, diff --git a/lib/Optimizer/Dialect/CC/CMakeLists.txt b/lib/Optimizer/Dialect/CC/CMakeLists.txt index ee725ba8913..6cd7b3c9f69 100644 --- a/lib/Optimizer/Dialect/CC/CMakeLists.txt +++ b/lib/Optimizer/Dialect/CC/CMakeLists.txt @@ -16,8 +16,9 @@ add_cudaq_dialect_library(CCDialect CCOpsIncGen CCTypesIncGen - LINK_LIBS + LINK_LIBS PUBLIC MLIRComplexDialect + MLIRControlFlowDialect MLIRFuncDialect MLIRLLVMDialect MLIRIR diff --git a/lib/Optimizer/Dialect/Quake/CanonicalPatterns.inc b/lib/Optimizer/Dialect/Quake/CanonicalPatterns.inc index a2d45bc0d21..75eafaf8f73 100644 --- a/lib/Optimizer/Dialect/Quake/CanonicalPatterns.inc +++ b/lib/Optimizer/Dialect/Quake/CanonicalPatterns.inc @@ -1,5 +1,5 @@ /****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -24,8 +24,8 @@ struct AdjustAdjointExpPauliPattern : OpRewritePattern { return failure(); SmallVector negp; if (!pauli.getParameters().empty()) - negp.push_back(rewriter.create(pauli.getLoc(), - pauli.getParameters()[0])); + negp.push_back(arith::NegFOp::create(rewriter, pauli.getLoc(), + pauli.getParameters()[0])); rewriter.replaceOpWithNewOp( pauli, pauli.getResultTypes(), UnitAttr{}, negp, pauli.getControls(), pauli.getTargets(), pauli.getNegatedQubitControlsAttr(), @@ -94,8 +94,8 @@ struct ForwardConstantVeqSizePattern if (!veqTy.hasSpecifiedSize()) return failure(); auto resTy = veqSize.getType(); - rewriter.replaceOpWithNewOp(veqSize, veqTy.getSize(), - resTy); + rewriter.replaceOpWithNewOp(veqSize, resTy, + veqTy.getSize()); return success(); } }; @@ -122,8 +122,8 @@ struct FuseConstantToAllocaPattern : public OpRewritePattern { return failure(); auto loc = alloc.getLoc(); auto resTy = alloc.getType(); - auto newAlloc = rewriter.create( - loc, static_cast(*intCon)); + auto newAlloc = quake::AllocaOp::create(rewriter, loc, + static_cast(*intCon)); rewriter.replaceOpWithNewOp(alloc, resTy, newAlloc); return success(); } @@ -216,8 +216,8 @@ static Value createCast(PatternRewriter &rewriter, Location loc, Value inVal) { auto i64Ty = rewriter.getI64Type(); assert(inVal.getType() != rewriter.getIndexType() && "use of index type is deprecated"); - return rewriter.create(loc, i64Ty, inVal, - cudaq::cc::CastOpMode::Unsigned); + return cudaq::cc::CastOp::create(rewriter, loc, i64Ty, inVal, + cudaq::cc::CastOpMode::Unsigned); } class ExtractRefFromSubVeqPattern @@ -251,18 +251,18 @@ public: auto loc = extract.getLoc(); auto low = [&]() -> Value { if (subveq.hasConstantLowerBound()) - return rewriter.create( - loc, subveq.getConstantLowerBound(), 64); + return arith::ConstantIntOp::create(rewriter, loc, + subveq.getConstantLowerBound(), 64); return subveq.getLower(); }(); if (extract.hasConstantIndex()) { - Value cv = rewriter.create( - loc, extract.getConstantIndex(), low.getType()); - offset = rewriter.create(loc, cv, low); + Value cv = arith::ConstantIntOp::create(rewriter, loc, low.getType(), + extract.getConstantIndex()); + offset = arith::AddIOp::create(rewriter, loc, cv, low); } else { auto cast1 = createCast(rewriter, loc, extract.getIndex()); auto cast2 = createCast(rewriter, loc, low); - offset = rewriter.create(loc, cast1, cast2); + offset = arith::AddIOp::create(rewriter, loc, cast1, cast2); } rewriter.replaceOpWithNewOp(extract, subveq.getVeq(), offset); @@ -338,7 +338,8 @@ struct ConcatSizePattern : public OpRewritePattern { if (*arity) { // Get each member for IR legalization. for (auto [i, memTy] : llvm::enumerate(stqTy.getMembers())) { - auto mem = rewriter.create(loc, memTy, opnd, i); + auto mem = + quake::GetMemberOp::create(rewriter, loc, memTy, opnd, i); targets.push_back(mem); } } @@ -354,7 +355,7 @@ struct ConcatSizePattern : public OpRewritePattern { // Leans into the relax_size canonicalization pattern. auto newTy = quake::VeqType::get(ctx, sum); - Value newOp = rewriter.create(loc, newTy, targets); + Value newOp = quake::ConcatOp::create(rewriter, loc, newTy, targets); auto noSizeTy = quake::VeqType::getUnsized(ctx); rewriter.replaceOpWithNewOp(concat, noSizeTy, newOp); return success(); @@ -449,8 +450,9 @@ struct ForwardAllocaTypePattern auto targ = initState.getTargets(); if (auto targTy = dyn_cast(targ.getType())) if (targTy.hasSpecifiedSize()) { - auto newInit = rewriter.create( - initState.getLoc(), targTy, targ, initState.getState()); + auto newInit = quake::InitializeStateOp::create( + rewriter, initState.getLoc(), targTy, targ, + initState.getState()); rewriter.replaceOpWithNewOp(initState, isTy, newInit); return success(); @@ -493,9 +495,9 @@ struct FixUnspecifiedSubveqPattern : public OpRewritePattern { subveq.getConstantUpperBound() - subveq.getConstantLowerBound() + 1u; auto szVecTy = quake::VeqType::get(ctx, size); auto loc = subveq.getLoc(); - auto subv = rewriter.create( - loc, szVecTy, subveq.getVeq(), subveq.getLower(), subveq.getUpper(), - subveq.getRawLower(), subveq.getRawUpper()); + auto subv = quake::SubVeqOp::create( + rewriter, loc, szVecTy, subveq.getVeq(), subveq.getLower(), + subveq.getUpper(), subveq.getRawLower(), subveq.getRawUpper()); rewriter.replaceOpWithNewOp(subveq, veqTy, subv); return success(); } @@ -606,8 +608,8 @@ public: // Lambda to create a Value for the lower bound of `s`. auto lofunc = [&](quake::SubVeqOp s) -> Value { if (s.hasConstantLowerBound()) - return rewriter.create( - loc, s.getConstantLowerBound(), 64); + return arith::ConstantIntOp::create(rewriter, loc, + s.getConstantLowerBound(), 64); return s.getLower(); }; auto priorlo = lofunc(prior); @@ -616,15 +618,15 @@ public: // Lambda for creating the upper bound Value. auto svup = [&]() -> Value { if (subveq.hasConstantUpperBound()) - return rewriter.create( - loc, subveq.getConstantUpperBound(), 64); + return arith::ConstantIntOp::create(rewriter, loc, + subveq.getConstantUpperBound(), 64); return subveq.getUpper(); }(); auto cast1 = createCast(rewriter, loc, priorlo); auto cast2 = createCast(rewriter, loc, svlo); auto cast3 = createCast(rewriter, loc, svup); - Value sum1 = rewriter.create(loc, cast1, cast2); - Value sum2 = rewriter.create(loc, cast1, cast3); + Value sum1 = arith::AddIOp::create(rewriter, loc, cast1, cast2); + Value sum2 = arith::AddIOp::create(rewriter, loc, cast1, cast3); auto veqTy = subveq.getType(); rewriter.replaceOpWithNewOp(subveq, veqTy, prior.getVeq(), sum1, sum2); @@ -648,8 +650,8 @@ struct FoldInitStateSizePattern : public OpRewritePattern { dyn_cast(initState.getTargets().getType())) if (veqTy.hasSpecifiedSize()) { std::size_t numQubits = veqTy.getSize(); - rewriter.replaceOpWithNewOp(veqSize, numQubits, - veqSize.getType()); + rewriter.replaceOpWithNewOp( + veqSize, veqSize.getType(), numQubits); return success(); } return failure(); @@ -700,12 +702,12 @@ struct MergeRotationPattern : public OpRewritePattern { auto adjAttr = rotate.getIsAdjAttr(); auto newAngle = [&]() -> Value { if (input.isAdj() == rotate.isAdj()) - return rewriter.create(loc, angle1, angle2); + return arith::AddFOp::create(rewriter, loc, angle1, angle2); // One is adjoint, so it should be subtracted from the other. if (input.isAdj()) - return rewriter.create(loc, angle2, angle1); + return arith::SubFOp::create(rewriter, loc, angle2, angle1); adjAttr = input.getIsAdjAttr(); - return rewriter.create(loc, angle1, angle2); + return arith::SubFOp::create(rewriter, loc, angle1, angle2); }(); rewriter.replaceOpWithNewOp(rotate, rotate.getResultTypes(), adjAttr, ValueRange{newAngle}, ValueRange{}, @@ -726,7 +728,7 @@ struct ForwardRelaxedSizePattern : public OpRewritePattern { PatternRewriter &rewriter) const override { auto inpVec = relax.getInputVec(); bool replaced = false; - rewriter.replaceOpWithIf(relax, inpVec, [&](OpOperand &use) { + rewriter.replaceUsesWithIf(relax, inpVec, [&](OpOperand &use) { bool res = false; if (Operation *user = use.getOwner()) res = isQuakeOperation(user) && !isa(user); diff --git a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp index bc5a2865773..c3e85b6b0de 100644 --- a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp +++ b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp @@ -127,16 +127,18 @@ Value quake::createConstantAlloca(PatternRewriter &builder, Location loc, auto newAlloca = [&]() { if (isa(result.getType()) && cast(result.getType()).hasSpecifiedSize()) { - return builder.create( - loc, cast(result.getType()).getSize()); + return quake::AllocaOp::create( + builder, loc, cast(result.getType()).getSize()); } auto constOp = cast(args[0].getDefiningOp()); - return builder.create( - loc, static_cast( - cast(constOp.getValue()).getInt())); + return quake::AllocaOp::create( + builder, loc, + static_cast( + cast(constOp.getValue()).getInt())); }(); - return builder.create( - loc, quake::VeqType::getUnsized(builder.getContext()), newAlloca); + return quake::RelaxSizeOp::create( + builder, loc, quake::VeqType::getUnsized(builder.getContext()), + newAlloca); } LogicalResult quake::AllocaOp::verify() { @@ -1175,15 +1177,16 @@ using EffectsVectorImpl = /// reference or value form. A operation with modeless effects is not removed /// when its result(s) is (are) unused. [[maybe_unused]] inline static void -getModelessEffectsImpl(EffectsVectorImpl &effects, ValueRange controls, - ValueRange targets) { - for (auto v : controls) - effects.emplace_back(MemoryEffects::Read::get(), v, +getModelessEffectsImpl(EffectsVectorImpl &effects, + MutableArrayRef controls, + MutableArrayRef targets) { + for (OpOperand &v : controls) + effects.emplace_back(MemoryEffects::Read::get(), &v, SideEffects::DefaultResource::get()); - for (auto v : targets) { - effects.emplace_back(MemoryEffects::Read::get(), v, + for (OpOperand &v : targets) { + effects.emplace_back(MemoryEffects::Read::get(), &v, SideEffects::DefaultResource::get()); - effects.emplace_back(MemoryEffects::Write::get(), v, + effects.emplace_back(MemoryEffects::Write::get(), &v, SideEffects::DefaultResource::get()); } } @@ -1195,36 +1198,37 @@ getModelessEffectsImpl(EffectsVectorImpl &effects, ValueRange controls, /// have both a read and write effect. If the operand is in value form, the /// operation introduces no effects on that operand. inline static void getModedEffectsImpl(EffectsVectorImpl &effects, - ValueRange controls, - ValueRange targets) { - for (auto v : controls) - if (isa(v.getType())) - effects.emplace_back(MemoryEffects::Read::get(), v, + MutableArrayRef controls, + MutableArrayRef targets) { + for (OpOperand &v : controls) + if (isa(v.get().getType())) + effects.emplace_back(MemoryEffects::Read::get(), &v, SideEffects::DefaultResource::get()); - for (auto v : targets) - if (isa(v.getType())) { - effects.emplace_back(MemoryEffects::Read::get(), v, + for (OpOperand &v : targets) + if (isa(v.get().getType())) { + effects.emplace_back(MemoryEffects::Read::get(), &v, SideEffects::DefaultResource::get()); - effects.emplace_back(MemoryEffects::Write::get(), v, + effects.emplace_back(MemoryEffects::Write::get(), &v, SideEffects::DefaultResource::get()); } } /// Quake reset has modeless effects. void quake::getResetEffectsImpl(EffectsVectorImpl &effects, - ValueRange targets) { + MutableArrayRef targets) { getModedEffectsImpl(effects, {}, targets); } /// Quake measurement operations have moded effects. void quake::getMeasurementEffectsImpl(EffectsVectorImpl &effects, - ValueRange targets) { + MutableArrayRef targets) { getModedEffectsImpl(effects, {}, targets); } /// Quake quantum operators have moded effects. void quake::getOperatorEffectsImpl(EffectsVectorImpl &effects, - ValueRange controls, ValueRange targets) { + MutableArrayRef controls, + MutableArrayRef targets) { getModedEffectsImpl(effects, controls, targets); } @@ -1262,7 +1266,5 @@ VERIFY_OPS(INSTANTIATE_LINEAR_TYPE_VERIFY) // Generated logic //===----------------------------------------------------------------------===// -using namespace cudaq; - #define GET_OP_CLASSES #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.cpp.inc" diff --git a/lib/Optimizer/Transforms/AddDeallocs.cpp b/lib/Optimizer/Transforms/AddDeallocs.cpp index f2dad559e0d..9ad8071a8c4 100644 --- a/lib/Optimizer/Transforms/AddDeallocs.cpp +++ b/lib/Optimizer/Transforms/AddDeallocs.cpp @@ -7,14 +7,17 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "mlir/IR/Dominance.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" +namespace cudaq::opt { +#define GEN_PASS_DEF_QUAKEADDDEALLOCS +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + #define DEBUG_TYPE "add-deallocs" using namespace mlir; @@ -135,7 +138,7 @@ inline void generateDeallocsForSet(PatternRewriter &rewriter, dyn_cast(*a->getUsers().begin())) v = initState; } - rewriter.create(a->getLoc(), v); + quake::DeallocOp::create(rewriter, a->getLoc(), v); } } @@ -144,7 +147,6 @@ template LogicalResult addDeallocations(OP wrapper, PatternRewriter &rewriter, const DeallocationAnalysisInfo &infoMap, const DominanceInfo &domInfo) { - rewriter.startRootUpdate(wrapper); llvm::DenseSet allocs; for (auto &[op, done] : infoMap.allocMap) if ((op->getParentOp() == wrapper.getOperation()) && !done) @@ -158,48 +160,49 @@ LogicalResult addDeallocations(OP wrapper, PatternRewriter &rewriter, LLVM_DEBUG(llvm::dbgs() << "adding deallocations to " << wrapper.getOperation() << '\n'); - // 1) Create an exit block to stick dealloc operations in. - auto *exitBlock = new Block; - exitBlock->addArguments( - wrapper.getResultTypes(), - SmallVector{wrapper.getNumResults(), wrapper.getLoc()}); - wrapper.getRegion().push_back(exitBlock); - - // 2) Update all the RET ops (at top level) to branches to the exit block - // when it is correct to do so. Otherwise, add the subset of deallocations - // inline before each RET op. - auto entireSetDominates = [&](RET ret) { - for (auto *alloc : allocs) - if (!domInfo.dominates(alloc, ret)) - return false; - return true; - }; - for (Block &block : wrapper.getRegion()) - for (Operation &op : block) - if (auto ret = dyn_cast(op)) { - if (entireSetDominates(ret)) { - // Replace the RET op with a branch to the shared deallocation block. - rewriter.setInsertionPoint(ret); - rewriter.replaceOpWithNewOp(ret, exitBlock, - ret.getOperands()); - } else { - // Collect only the subset that dominates this RET op. Insert the - // deallocations directly in front of the RET op. - llvm::DenseSet subset; - for (auto *alloc : allocs) - if (domInfo.dominates(alloc, ret)) - subset.insert(alloc); - rewriter.setInsertionPoint(ret); - generateDeallocsForSet(rewriter, subset); + rewriter.modifyOpInPlace(wrapper, [&]() { + // 1) Create an exit block to stick dealloc operations in. + auto *exitBlock = new Block; + exitBlock->addArguments( + wrapper.getResultTypes(), + SmallVector{wrapper.getNumResults(), wrapper.getLoc()}); + wrapper.getRegion().push_back(exitBlock); + + // 2) Update all the RET ops (at top level) to branches to the exit block + // when it is correct to do so. Otherwise, add the subset of deallocations + // inline before each RET op. + auto entireSetDominates = [&](RET ret) { + for (auto *alloc : allocs) + if (!domInfo.dominates(alloc, ret)) + return false; + return true; + }; + for (Block &block : wrapper.getRegion()) + for (Operation &op : block) + if (auto ret = dyn_cast(op)) { + if (entireSetDominates(ret)) { + // Replace the RET op with a branch to the shared deallocation + // block. + rewriter.setInsertionPoint(ret); + rewriter.replaceOpWithNewOp(ret, exitBlock, + ret.getOperands()); + } else { + // Collect only the subset that dominates this RET op. Insert the + // deallocations directly in front of the RET op. + llvm::DenseSet subset; + for (auto *alloc : allocs) + if (domInfo.dominates(alloc, ret)) + subset.insert(alloc); + rewriter.setInsertionPoint(ret); + generateDeallocsForSet(rewriter, subset); + } } - } - - // 3) Create the deallocations. - rewriter.setInsertionPointToEnd(exitBlock); - generateDeallocsForSet(rewriter, allocs); - rewriter.create(wrapper.getLoc(), exitBlock->getArguments()); - rewriter.finalizeRootUpdate(wrapper); + // 3) Create the deallocations. + rewriter.setInsertionPointToEnd(exitBlock); + generateDeallocsForSet(rewriter, allocs); + RET::create(rewriter, wrapper.getLoc(), exitBlock->getArguments()); + }); LLVM_DEBUG(llvm::dbgs() << "updated " << wrapper.getOperation() << '\n'); return success(); } @@ -243,7 +246,7 @@ using ScopeDeallocPattern = /// dealloc ops along non-trivial control paths in the presence of global jumps. /// DeallocationAnalysis will flag any unwinding jumps as errors. class QuakeAddDeallocsPass - : public cudaq::opt::QuakeAddDeallocsBase { + : public cudaq::opt::impl::QuakeAddDeallocsBase { public: void runOnOperation() override { func::FuncOp funcOp = getOperation(); diff --git a/lib/Optimizer/Transforms/AddMeasurements.cpp b/lib/Optimizer/Transforms/AddMeasurements.cpp index 1b71702ae1a..e3bda7eec30 100644 --- a/lib/Optimizer/Transforms/AddMeasurements.cpp +++ b/lib/Optimizer/Transforms/AddMeasurements.cpp @@ -8,8 +8,6 @@ #include "PassDetails.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -83,26 +81,26 @@ addMeasurements(func::FuncOp funcOp, SmallVector &allocations, // Replace every func.return in the function with a branch to the new block. for (auto returnOp : returnsToReplace) { OpBuilder builder(returnOp); - builder.create(returnOp.getLoc(), newBlock, - returnOp.getOperands()); + cf::BranchOp::create(builder, returnOp.getLoc(), newBlock, + returnOp.getOperands()); returnOp.erase(); } // Set insertion point to the new block and add measurements builder.setInsertionPointToEnd(newBlock); auto measTy = quake::MeasureType::get(builder.getContext()); - for (auto &[index, alloca] : llvm::enumerate(allocations)) { + for (auto [index, alloca] : llvm::enumerate(allocations)) { if (isa(alloca->getResult(0).getType())) { auto stdvecTy = cudaq::cc::StdvecType::get(measTy); - builder.create(loc, stdvecTy, - ValueRange{alloca->getResult(0)}); + quake::MzOp::create(builder, loc, stdvecTy, + ValueRange{alloca->getResult(0)}); } else { - builder.create(loc, measTy, alloca->getResult(0)); + quake::MzOp::create(builder, loc, measTy, alloca->getResult(0)); } } // Add the final return using block arguments - builder.create(loc, newBlock->getArguments()); + func::ReturnOp::create(builder, loc, newBlock->getArguments()); return success(); } @@ -122,8 +120,8 @@ struct AddMeasurementsPass /// NOTE: Having a conditional on a measurement indicates that a measurement /// is present, however, it does not guarantee that all the allocated qubits /// are measured. - if (auto boolAttr = func->getAttr("qubitMeasurementFeedback") - .dyn_cast_or_null()) { + if (auto boolAttr = dyn_cast_if_present( + func->getAttr("qubitMeasurementFeedback"))) { if (boolAttr.getValue()) return; } diff --git a/lib/Optimizer/Transforms/AddMetadata.cpp b/lib/Optimizer/Transforms/AddMetadata.cpp index 873552f2245..bb356a952ac 100644 --- a/lib/Optimizer/Transforms/AddMetadata.cpp +++ b/lib/Optimizer/Transforms/AddMetadata.cpp @@ -8,22 +8,22 @@ #include "cudaq/Optimizer/Transforms/AddMetadata.h" #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "llvm/Support/Debug.h" -#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" -using namespace mlir; +namespace cudaq::opt { +#define GEN_PASS_DEF_QUAKEADDMETADATA +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt #define DEBUG_TYPE "add-metadata" +using namespace mlir; + static cudaq::cc::AllocaOp seekAllocaFrom(Value v); static cudaq::cc::AllocaOp seekAllocaFrom(Operation *op) { @@ -155,7 +155,7 @@ namespace { /// This pass will analyze Quake functions and attach metadata (as an MLIR /// function attribute) for specific features. class QuakeAddMetadataPass - : public cudaq::opt::QuakeAddMetadataBase { + : public cudaq::opt::impl::QuakeAddMetadataBase { public: QuakeAddMetadataPass() = default; diff --git a/lib/Optimizer/Transforms/AggressiveInlining.cpp b/lib/Optimizer/Transforms/AggressiveInlining.cpp index 802477ae976..bf7066c2bbf 100644 --- a/lib/Optimizer/Transforms/AggressiveInlining.cpp +++ b/lib/Optimizer/Transforms/AggressiveInlining.cpp @@ -39,7 +39,7 @@ getConversionMap(ModuleOp module) { cudaq::runtime::mangledNameMap)) { for (auto namedAttr : mangledNameMap) { auto key = namedAttr.getName(); - auto val = namedAttr.getValue().cast().getValue(); + auto val = cast(namedAttr.getValue()).getValue(); result.insert({val, key}); } return result; @@ -68,9 +68,9 @@ class RewriteCall : public OpRewritePattern { auto loc = call.getLoc(); auto funcTy = call.getCalleeType(); cudaq::opt::factory::getOrAddFunc(loc, directName, funcTy, module); - rewriter.startRootUpdate(call); - call.setCalleeAttr(SymbolRefAttr::get(ctx, directName)); - rewriter.finalizeRootUpdate(call); + rewriter.modifyOpInPlace(call, [&]() { + call.setCalleeAttr(SymbolRefAttr::get(ctx, directName)); + }); LLVM_DEBUG(llvm::dbgs() << "Rewriting " << directName << '\n'); return success(); } @@ -93,7 +93,7 @@ class ConvertToDirectCalls LLVM_DEBUG(llvm::dbgs() << "Processing: " << module << '\n'); RewritePatternSet patterns(ctx); patterns.insert(ctx, *indirectMapOpt, module); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) signalPassFailure(); } } diff --git a/lib/Optimizer/Transforms/ApplyControlNegations.cpp b/lib/Optimizer/Transforms/ApplyControlNegations.cpp index 1d0885f5fe1..78915b17e2c 100644 --- a/lib/Optimizer/Transforms/ApplyControlNegations.cpp +++ b/lib/Optimizer/Transforms/ApplyControlNegations.cpp @@ -8,8 +8,6 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "mlir/IR/PatternMatch.h" @@ -40,29 +38,26 @@ class ReplaceNegativeControl : public OpRewritePattern { for (auto negationIter : llvm::enumerate(negations.value())) if (negationIter.value()) - rewriter.create( - loc, ValueRange(), - ValueRange{op.getControls()[negationIter.index()]}); + quake::XOp::create(rewriter, loc, ValueRange(), + ValueRange{op.getControls()[negationIter.index()]}); if constexpr (std::is_same_v) { - rewriter.create( - loc, TypeRange{}, op.getIsAdjAttr(), op.getParameters(), + quake::ExpPauliOp::create( + rewriter, loc, TypeRange{}, op.getIsAdjAttr(), op.getParameters(), op.getControls(), op.getTargets(), op.getNegatedQubitControlsAttr(), op.getPauli(), op.getPauliLiteralAttr()); } else if constexpr (std::is_same_v) { - rewriter.create(loc, op.getGeneratorAttr(), op.getIsAdj(), - op.getParameters(), op.getControls(), - op.getTargets()); + Op::create(rewriter, loc, op.getGeneratorAttr(), op.getIsAdj(), + op.getParameters(), op.getControls(), op.getTargets()); } else { - rewriter.create(loc, op.getIsAdj(), op.getParameters(), - op.getControls(), op.getTargets()); + Op::create(rewriter, loc, op.getIsAdj(), op.getParameters(), + op.getControls(), op.getTargets()); } for (auto negationIter : llvm::enumerate(negations.value())) if (negationIter.value()) - rewriter.create( - loc, ValueRange(), - ValueRange{op.getControls()[negationIter.index()]}); + quake::XOp::create(rewriter, loc, ValueRange(), + ValueRange{op.getControls()[negationIter.index()]}); rewriter.eraseOp(op); return success(); diff --git a/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp b/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp index c428bfdec2a..4676d4588a3 100644 --- a/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp +++ b/lib/Optimizer/Transforms/ApplyOpSpecialization.cpp @@ -130,8 +130,8 @@ struct ApplyOpAnalysis { auto *ctx = newFunc.getContext(); OpBuilder builder(ctx); builder.setInsertionPoint(&newFunc.front().front()); - auto relax = builder.create( - newFunc.getLoc(), quake::VeqType::getUnsized(ctx), + auto relax = quake::RelaxSizeOp::create( + builder, newFunc.getLoc(), quake::VeqType::getUnsized(ctx), newFunc.front().getArgument(pos)); newFunc.front().getArgument(pos).replaceAllUsesExcept( relax.getResult(), relax.getOperation()); @@ -143,10 +143,10 @@ struct ApplyOpAnalysis { entry.push_front(c); module.push_back(newFunc); OpBuilder builder(apply); - auto newApply = builder.create( - apply.getLoc(), apply.getResultTypes(), - SymbolRefAttr::get(ctx, calleeName), apply.getIndirectCallee(), - apply.getIsAdj(), apply.getControls(), preservedArgs); + auto newApply = quake::ApplyOp::create( + builder, apply.getLoc(), apply.getResultTypes(), + SymbolRefAttr::get(ctx, calleeName), apply.getIsAdj(), + apply.getControls(), preservedArgs); apply->replaceAllUsesWith(newApply.getResults()); apply->dropAllReferences(); apply->erase(); @@ -318,8 +318,8 @@ struct ApplyOpPattern : public OpRewritePattern { auto unsizedVeqTy = quake::VeqType::getUnsized(ctx); SmallVector newArgs; if (!apply.getControls().empty()) { - auto consOp = rewriter.create( - apply.getLoc(), unsizedVeqTy, apply.getControls()); + auto consOp = quake::ConcatOp::create(rewriter, apply.getLoc(), + unsizedVeqTy, apply.getControls()); newArgs.push_back(consOp); } for (auto [v, toTy] : @@ -328,8 +328,8 @@ struct ApplyOpPattern : public OpRewritePattern { continue; Value arg = v; if (arg.getType() != toTy) - arg = - rewriter.create(apply.getLoc(), unsizedVeqTy, arg); + arg = quake::ConcatOp::create(rewriter, apply.getLoc(), unsizedVeqTy, + arg); newArgs.emplace_back(arg); } LLVM_DEBUG(llvm::dbgs() << "replacing: " << apply << '\n'); @@ -378,7 +378,7 @@ class ApplySpecializationPass auto *ctx = module.getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) signalPassFailure(); ApplyOpAnalysis analysis(module, constantPropagation); @@ -494,7 +494,7 @@ class ApplySpecializationPass // This is a quantum op. It should be updated with an additional control // argument, `newCond`. - auto arrAttr = op->getAttr(segmentSizes).cast(); + auto arrAttr = cast(op->getAttr(segmentSizes)); SmallVector arrRef{arrAttr.asArrayRef().begin(), arrAttr.asArrayRef().end()}; SmallVector operands(op->getOperands().begin(), @@ -518,9 +518,10 @@ class ApplySpecializationPass SmallVector newControls = {newCond}; newControls.append(apply.getControls().begin(), apply.getControls().end()); - auto newApply = builder.create( - apply.getLoc(), apply.getResultTypes(), apply.getCalleeAttr(), - apply.getIsAdjAttr(), newControls, apply.getActuals()); + auto newApply = quake::ApplyOp::create( + builder, apply.getLoc(), apply.getResultTypes(), + apply.getCalleeAttr(), apply.getIsAdjAttr(), newControls, + apply.getActuals()); apply->replaceAllUsesWith(newApply.getResults()); apply->erase(); } else if (isQuantumKernelCall(op)) { @@ -614,7 +615,7 @@ class ApplySpecializationPass static Value createIntConstant(OpBuilder &builder, Location loc, Type ty, std::int64_t val) { auto attr = builder.getIntegerAttr(ty, val); - return builder.create(loc, attr, ty); + return arith::ConstantOp::create(builder, loc, ty, attr); } /// Clone the LoopOp, \p loop, and return a new LoopOp that runs the loop @@ -640,31 +641,32 @@ class ApplySpecializationPass auto zero = createIntConstant(builder, loc, newStepVal.getType(), 0); if (!stepIsAnAddOp) { // Negate the step value when arith.subi. - newStepVal = builder.create(loc, zero, newStepVal); + newStepVal = arith::SubIOp::create(builder, loc, zero, newStepVal); } - Value iters = builder.create( - loc, newTermVal, loop.getInitialArgs()[loopComponents->induction]); + Value iters = + arith::SubIOp::create(builder, loc, newTermVal, + loop.getInitialArgs()[loopComponents->induction]); auto cmpOp = cast(loopComponents->compareOp); auto pred = cmpOp.getPredicate(); auto one = createIntConstant(builder, loc, iters.getType(), 1); if (cudaq::opt::isSemiOpenPredicate(pred)) { - Value negStepCond = builder.create( - loc, arith::CmpIPredicate::slt, newStepVal, zero); + Value negStepCond = arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::slt, newStepVal, zero); auto negOne = createIntConstant(builder, loc, iters.getType(), -1); - Value adj = builder.create(loc, iters.getType(), - negStepCond, one, negOne); - iters = builder.create(loc, iters, adj); + Value adj = arith::SelectOp::create(builder, loc, iters.getType(), + negStepCond, one, negOne); + iters = arith::AddIOp::create(builder, loc, iters, adj); } - iters = builder.create(loc, iters, newStepVal); - iters = builder.create(loc, iters, newStepVal); - Value noLoopCond = builder.create( - loc, arith::CmpIPredicate::sgt, iters, zero); - iters = builder.create(loc, iters.getType(), noLoopCond, - iters, zero); - Value lastIter = builder.create(loc, iters, one); - Value nStep = builder.create(loc, lastIter, newStepVal); - Value newInitVal = - builder.create(loc, loopComponents->initialValue, nStep); + iters = arith::AddIOp::create(builder, loc, iters, newStepVal); + iters = arith::DivSIOp::create(builder, loc, iters, newStepVal); + Value noLoopCond = arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::sgt, iters, zero); + iters = arith::SelectOp::create(builder, loc, iters.getType(), noLoopCond, + iters, zero); + Value lastIter = arith::SubIOp::create(builder, loc, iters, one); + Value nStep = arith::MulIOp::create(builder, loc, lastIter, newStepVal); + Value newInitVal = arith::AddIOp::create( + builder, loc, loopComponents->initialValue, nStep); // Create the list of input arguments to loop. We're going to add an // argument to the end that is the number of iterations left to execute. @@ -679,8 +681,9 @@ class ApplySpecializationPass // through the new argument. In the stepRegion, decrement the new argument // by 1 and convert the original step expression to be a negative step. IRRewriter rewriter(builder); - return rewriter.create( - loc, ValueRange{inputs}.getTypes(), inputs, /*postCondition=*/false, + return cudaq::cc::LoopOp::create( + rewriter, loc, ValueRange{inputs}.getTypes(), inputs, + /*postCondition=*/false, [&](OpBuilder &builder, Location loc, Region ®ion) { IRMapping dummyMap; loop.getWhileRegion().cloneInto(®ion, dummyMap); @@ -694,8 +697,8 @@ class ApplySpecializationPass Value trip = block.getArguments().back(); args.push_back(trip); auto zero = createIntConstant(builder, loc, trip.getType(), 0); - auto newCond = rewriter.create( - loc, arith::CmpIPredicate::sgt, trip, zero); + auto newCond = arith::CmpIOp::create( + rewriter, loc, arith::CmpIPredicate::sgt, trip, zero); rewriter.replaceOpWithNewOp(condOp, newCond, args); }, @@ -725,15 +728,15 @@ class ApplySpecializationPass auto *stepOp = contOp.getOperand(0).getDefiningOp(); auto newBump = [&]() -> Value { if (stepIsAnAddOp) - return rewriter.create( - loc, stepOp->getOperand(commuteTheAddOp ? 1 : 0), + return arith::SubIOp::create( + rewriter, loc, stepOp->getOperand(commuteTheAddOp ? 1 : 0), stepOp->getOperand(commuteTheAddOp ? 0 : 1)); - return rewriter.create(loc, stepOp->getOperands()); + return arith::AddIOp::create(rewriter, loc, stepOp->getOperands()); }(); args[loopComponents->induction] = newBump; auto one = createIntConstant(rewriter, loc, iters.getType(), 1); - args.push_back(rewriter.create( - loc, entry.getArguments().back(), one)); + args.push_back(arith::SubIOp::create( + rewriter, loc, entry.getArguments().back(), one)); rewriter.replaceOpWithNewOp(contOp, args); }); } @@ -787,9 +790,10 @@ class ApplySpecializationPass mlir::UnitAttr newIsAdj = applyOp.getIsAdj() ? mlir::UnitAttr{} : mlir::UnitAttr::get(builder.getContext()); - builder.create( - applyOp.getLoc(), applyOp.getResultTypes(), applyOp.getCalleeAttr(), - newIsAdj, applyOp.getControls(), applyOp.getActuals()); + quake::ApplyOp::create(builder, applyOp.getLoc(), + applyOp.getResultTypes(), + applyOp.getCalleeAttr(), newIsAdj, + applyOp.getControls(), applyOp.getActuals()); applyOp->erase(); continue; } @@ -797,13 +801,13 @@ class ApplySpecializationPass bool opWasNegated = false; IRMapping mapper; LLVM_DEBUG(llvm::dbgs() << "moving quantum op: " << *op << ".\n"); - auto arrAttr = op->getAttr(segmentSizes).cast(); + auto arrAttr = cast(op->getAttr(segmentSizes)); // Walk over any floating-point parameters to `op` and negate them. for (auto iter = op->getOperands().begin(), endIter = op->getOperands().begin() + arrAttr[0]; iter != endIter; ++iter) { Value val = *iter; - Value neg = builder.create(loc, val.getType(), val); + Value neg = arith::NegFOp::create(builder, loc, val.getType(), val); mapper.map(val, neg); opWasNegated = true; } @@ -845,7 +849,7 @@ class ApplySpecializationPass auto *ctx = module.getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx, constantPropagation); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After apply specialization:\n" << module << "\n\n"); diff --git a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp index c8643a9d854..091a73b776d 100644 --- a/lib/Optimizer/Transforms/ArgumentSynthesis.cpp +++ b/lib/Optimizer/Transforms/ArgumentSynthesis.cpp @@ -122,10 +122,10 @@ class ArgumentSynthesisPass OpBuilder builder{ctx}; Block *splitBlock = entry.splitBlock(entry.begin()); builder.setInsertionPointToEnd(&entry); - builder.create(func.getLoc(), &subst.getBody().front()); + cf::BranchOp::create(builder, func.getLoc(), &subst.getBody().front()); Operation *lastOp = &subst.getBody().front().back(); builder.setInsertionPointToEnd(&subst.getBody().front()); - builder.create(func.getLoc(), splitBlock); + cf::BranchOp::create(builder, func.getLoc(), splitBlock); func.getBlocks().splice(Region::iterator{splitBlock}, subst.getBody().getBlocks()); if (lastOp && lastOp->getResult(0).getType() == @@ -152,8 +152,8 @@ class ArgumentSynthesisPass // substituted. Erasing the arguments changes the calling semantics and // breaks all calls to `func`. This practice is unnecessary and highly // discouraged. - if (changeSemantics) - func.eraseArguments(replacedArgs); + if (changeSemantics && failed(func.eraseArguments(replacedArgs))) + func->emitWarning("could not erase function arguments"); } } }; diff --git a/lib/Optimizer/Transforms/BasisConversion.cpp b/lib/Optimizer/Transforms/BasisConversion.cpp index 1515aecd7a1..15289f59960 100644 --- a/lib/Optimizer/Transforms/BasisConversion.cpp +++ b/lib/Optimizer/Transforms/BasisConversion.cpp @@ -7,10 +7,8 @@ ******************************************************************************/ #include "DecompositionPatterns.h" +#include "PassDetails.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Threading.h" @@ -18,16 +16,13 @@ #include "mlir/Rewrite/FrozenRewritePatternSet.h" #include "mlir/Transforms/DialectConversion.h" -using namespace mlir; - -//===----------------------------------------------------------------------===// -// Generated logic -//===----------------------------------------------------------------------===// namespace cudaq::opt { #define GEN_PASS_DEF_BASISCONVERSION #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt +using namespace mlir; + namespace { //===----------------------------------------------------------------------===// diff --git a/lib/Optimizer/Transforms/CableRoughIn.cpp b/lib/Optimizer/Transforms/CableRoughIn.cpp index 598f94d8091..b32e02937e4 100644 --- a/lib/Optimizer/Transforms/CableRoughIn.cpp +++ b/lib/Optimizer/Transforms/CableRoughIn.cpp @@ -8,10 +8,7 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -80,7 +77,7 @@ class CallPattern : public OpRewritePattern { for (auto arg : call.getOperands()) { Type argTy = arg.getType(); if (argTy == refTy) { - newArgs.push_back(rewriter.create(loc, wireTy, arg)); + newArgs.push_back(quake::UnwrapOp::create(rewriter, loc, wireTy, arg)); resultTys.push_back(wireTy); continue; } @@ -103,9 +100,9 @@ class CallPattern : public OpRewritePattern { SmallVector unwraps; for (auto carg : concat.getTargets()) unwraps.push_back( - rewriter.create(loc, wireTy, carg)); + quake::UnwrapOp::create(rewriter, loc, wireTy, carg)); newArgs.push_back( - rewriter.create(loc, cableTy, unwraps)); + quake::BundleCableOp::create(rewriter, loc, cableTy, unwraps)); resultTys.push_back(cableTy); continue; } @@ -121,7 +118,7 @@ class CallPattern : public OpRewritePattern { auto strArgTy = strArg.getType(); if (isa(strArgTy)) { unwraps.push_back( - rewriter.create(loc, wireTy, strArg)); + quake::UnwrapOp::create(rewriter, loc, wireTy, strArg)); cableSize++; continue; } @@ -142,7 +139,7 @@ class CallPattern : public OpRewritePattern { cableSize += concat.getTargets().size(); for (auto carg : concat.getTargets()) unwraps.push_back( - rewriter.create(loc, wireTy, carg)); + quake::UnwrapOp::create(rewriter, loc, wireTy, carg)); continue; } LLVM_DEBUG(llvm::dbgs() << strArg << " is not supported.\n"); @@ -150,7 +147,7 @@ class CallPattern : public OpRewritePattern { } auto cableTy = quake::CableType::get(ctx, cableSize); newArgs.push_back( - rewriter.create(loc, cableTy, unwraps)); + quake::BundleCableOp::create(rewriter, loc, cableTy, unwraps)); resultTys.push_back(cableTy); continue; } @@ -159,8 +156,8 @@ class CallPattern : public OpRewritePattern { } // Create a quake.call_by_ref operation. - auto callByRef = rewriter.create( - loc, resultTys, call.getCalleeAttr(), newArgs); + auto callByRef = quake::CallByRefOp::create( + rewriter, loc, call.getCalleeAttr(), resultTys, newArgs); // Wrap the wires and cables. std::size_t i = origCoarity; @@ -169,7 +166,7 @@ class CallPattern : public OpRewritePattern { for (auto arg : call.getOperands()) { Type argTy = arg.getType(); if (argTy == refTy) { - rewriter.create(loc, results[i++], arg); + quake::WrapOp::create(rewriter, loc, results[i++], arg); continue; } if (isa(argTy)) { @@ -181,11 +178,11 @@ class CallPattern : public OpRewritePattern { SmallVector wireTys(cableSize); std::fill(wireTys.begin(), wireTys.end(), wireTy); auto split = - rewriter.create(loc, wireTys, results[i++]); + quake::SplitCableOp::create(rewriter, loc, wireTys, results[i++]); SmallVector concatTargs{concat.getTargets().begin(), concat.getTargets().end()}; for (auto [j, wire] : llvm::enumerate(split.getResults())) - rewriter.create(loc, wire, concatTargs[j]); + quake::WrapOp::create(rewriter, loc, wire, concatTargs[j]); } if (isa(argTy)) { auto mkStruq = arg.getDefiningOp(); @@ -194,14 +191,14 @@ class CallPattern : public OpRewritePattern { SmallVector wireTys(cableSize); std::fill(wireTys.begin(), wireTys.end(), wireTy); auto split = - rewriter.create(loc, wireTys, results[i++]); + quake::SplitCableOp::create(rewriter, loc, wireTys, results[i++]); std::size_t j = 0; SmallVector splitResults{split.getResults().begin(), split.getResults().end()}; for (auto strArg : mkStruq.getVeqs()) { auto strArgTy = strArg.getType(); if (isa(strArgTy)) { - rewriter.create(loc, splitResults[j++], strArg); + quake::WrapOp::create(rewriter, loc, splitResults[j++], strArg); continue; } if (isa(strArgTy)) { @@ -211,8 +208,8 @@ class CallPattern : public OpRewritePattern { SmallVector concatTargs{concat.getTargets().begin(), concat.getTargets().end()}; for (std::size_t k = 0, K = concatTargs.size(); k < K; ++k) - rewriter.create(loc, splitResults[j++], - concatTargs[k]); + quake::WrapOp::create(rewriter, loc, splitResults[j++], + concatTargs[k]); continue; } LLVM_DEBUG(llvm::dbgs() << strArg << " is not supported.\n"); @@ -240,7 +237,7 @@ class CableRoughInPass patterns.insert(ctx); quake::ExtractRefOp::getCanonicalizationPatterns(patterns, ctx); quake::GetMemberOp::getCanonicalizationPatterns(patterns, ctx); - if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) + if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) signalPassFailure(); } }; diff --git a/lib/Optimizer/Transforms/ClassicalOptimization.cpp b/lib/Optimizer/Transforms/ClassicalOptimization.cpp index accc8b09b60..a17b68ad31c 100644 --- a/lib/Optimizer/Transforms/ClassicalOptimization.cpp +++ b/lib/Optimizer/Transforms/ClassicalOptimization.cpp @@ -81,7 +81,8 @@ class ClassicalOptimizationPass simplifyRegions(rewriter, op->getRegions()); } progress = 0; - (void)applyPatternsAndFoldGreedily(op, frozen); + if (failed(applyPatternsGreedily(op, frozen))) + break; } while (progress); } diff --git a/lib/Optimizer/Transforms/CombineMeasurements.cpp b/lib/Optimizer/Transforms/CombineMeasurements.cpp index 5065c8aa6b6..4d66e687dbc 100644 --- a/lib/Optimizer/Transforms/CombineMeasurements.cpp +++ b/lib/Optimizer/Transforms/CombineMeasurements.cpp @@ -9,14 +9,10 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" #include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "nlohmann/json.hpp" #include "llvm/Support/Debug.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -248,8 +244,8 @@ class CombineMeasurementsPass RewritePatternSet patterns(ctx); patterns.insert( ctx, analysis); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) { + if (failed( + applyPatternsGreedily(func.getOperation(), std::move(patterns)))) { func.emitOpError("Combining measurements failed"); signalPassFailure(); } diff --git a/lib/Optimizer/Transforms/CombineQuantumAlloc.cpp b/lib/Optimizer/Transforms/CombineQuantumAlloc.cpp index 29cfc8a3e9b..4ce925310a1 100644 --- a/lib/Optimizer/Transforms/CombineQuantumAlloc.cpp +++ b/lib/Optimizer/Transforms/CombineQuantumAlloc.cpp @@ -7,8 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -57,10 +55,11 @@ class AllocaPat : public OpRewritePattern { return success(); } if (isa(alloc.getType())) { - Value lo = rewriter.create( - alloc.getLoc(), os.first, rewriter.getI64Type()); - Value hi = rewriter.create( - alloc.getLoc(), os.first + os.second - 1, rewriter.getI64Type()); + Value lo = arith::ConstantIntOp::create( + rewriter, alloc.getLoc(), rewriter.getI64Type(), os.first); + Value hi = arith::ConstantIntOp::create(rewriter, alloc.getLoc(), + rewriter.getI64Type(), + os.first + os.second - 1); // trying to print alloc after the replace gives a segfault LLVM_DEBUG(llvm::dbgs() << "replace " << alloc); [[maybe_unused]] Value subveq = @@ -76,15 +75,15 @@ class AllocaPat : public OpRewritePattern { for (auto m : sty.getMembers()) { auto v = [&]() -> Value { if (isa(m)) { - auto result = rewriter.create( - loc, analysis.newAlloc, inner); + auto result = quake::ExtractRefOp::create( + rewriter, loc, analysis.newAlloc, inner); inner++; return result; } assert(cast(m).hasSpecifiedSize()); std::size_t dist = inner + cast(m).getSize() - 1; - auto result = rewriter.create( - loc, m, analysis.newAlloc, inner, dist); + auto result = quake::SubVeqOp::create( + rewriter, loc, m, analysis.newAlloc, inner, dist); inner = dist + 1; return result; }(); @@ -145,7 +144,7 @@ class CombineQuantumAllocationsPass OpBuilder rewriter(ctx); rewriter.setInsertionPointToStart(entryBlock); auto veqTy = quake::VeqType::get(ctx, currentOffset); - analysis.newAlloc = rewriter.create(loc, veqTy); + analysis.newAlloc = quake::AllocaOp::create(rewriter, loc, veqTy); // 3. Greedily replace the uses of the original alloca ops with uses of // partitions of the new alloca op. Replace subveq of subveq with a single @@ -158,8 +157,8 @@ class CombineQuantumAllocationsPass quake::GetMemberOp::getCanonicalizationPatterns(patterns, ctx); quake::SubVeqOp::getCanonicalizationPatterns(patterns, ctx); quake::ConcatOp::getCanonicalizationPatterns(patterns, ctx); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) { + if (failed(applyPatternsGreedily(func.getOperation(), + std::move(patterns)))) { func.emitOpError("combining alloca, subveq, and extract ops failed"); signalPassFailure(); } @@ -172,8 +171,8 @@ class CombineQuantumAllocationsPass for (auto &block : func.getRegion()) { if (block.hasNoSuccessors()) { rewriter.setInsertionPoint(block.getTerminator()); - rewriter.create(analysis.newAlloc.getLoc(), - analysis.newAlloc); + quake::DeallocOp::create(rewriter, analysis.newAlloc.getLoc(), + analysis.newAlloc); } } } diff --git a/lib/Optimizer/Transforms/ConstantPropagation.cpp b/lib/Optimizer/Transforms/ConstantPropagation.cpp index fd5fe25f2aa..58cab26148e 100644 --- a/lib/Optimizer/Transforms/ConstantPropagation.cpp +++ b/lib/Optimizer/Transforms/ConstantPropagation.cpp @@ -8,10 +8,7 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -114,8 +111,8 @@ class ForwardConstSubArray : public OpRewritePattern { } Type loadTy = loadSpan.getType(); auto arrayAttr = cast(attr); - Value newConArr = rewriter.create( - loadSpan.getLoc(), ty, arrayAttr); + Value newConArr = cudaq::cc::ConstantArrayOp::create( + rewriter, loadSpan.getLoc(), ty, arrayAttr); rewriter.replaceOpWithNewOp(loadSpan, loadTy, newConArr); return success(); @@ -193,24 +190,24 @@ class ForwardSingleDimensionData : public OpRewritePattern { auto loc = loadSpanEle.getLoc(); if (isa(loadTy)) { auto stringAttr = cast(attr); - auto lit = rewriter.create( - loc, cudaq::cc::PointerType::get(ty), stringAttr); - auto len = rewriter.create( - loc, stringAttr.getValue().size() + 1, 64); + auto lit = cudaq::cc::CreateStringLiteralOp::create( + rewriter, loc, cudaq::cc::PointerType::get(ty), stringAttr); + auto len = arith::ConstantIntOp::create( + rewriter, loc, stringAttr.getValue().size() + 1, 64); rewriter.replaceOpWithNewOp(loadSpanEle, loadTy, lit, len); return success(); } if (auto intTy = dyn_cast(loadTy)) { auto intAttr = cast(attr); - rewriter.replaceOpWithNewOp( - loadSpanEle, intAttr.getInt(), intTy); + rewriter.replaceOpWithNewOp(loadSpanEle, intTy, + intAttr.getInt()); return success(); } if (auto floatTy = dyn_cast(loadTy)) { auto floatAttr = cast(attr); - rewriter.replaceOpWithNewOp( - loadSpanEle, floatAttr.getValue(), floatTy); + rewriter.replaceOpWithNewOp(loadSpanEle, floatTy, + floatAttr.getValue()); return success(); } return failure(); @@ -231,8 +228,8 @@ class ConstantPropagationPass LLVM_DEBUG(llvm::dbgs() << "Before constant prop:\n" << func << '\n'); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) { + if (failed( + applyPatternsGreedily(func.getOperation(), std::move(patterns)))) { signalPassFailure(); return; } diff --git a/lib/Optimizer/Transforms/DeadStoreRemoval.cpp b/lib/Optimizer/Transforms/DeadStoreRemoval.cpp index 64d158cb518..dbc47a51afe 100644 --- a/lib/Optimizer/Transforms/DeadStoreRemoval.cpp +++ b/lib/Optimizer/Transforms/DeadStoreRemoval.cpp @@ -18,7 +18,7 @@ namespace cudaq::opt { #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt -#define DEBUG_TYPE "dsr" +#define DEBUG_TYPE "dead-store-removal" using namespace mlir; @@ -98,7 +98,7 @@ class DSRPass : public cudaq::opt::impl::DeadStoreRemovalBase { auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After erasure:\n" << *op << "\n\n"); } diff --git a/lib/Optimizer/Transforms/Decomposition.cpp b/lib/Optimizer/Transforms/Decomposition.cpp index cff76e3b32d..a94c239ce77 100644 --- a/lib/Optimizer/Transforms/Decomposition.cpp +++ b/lib/Optimizer/Transforms/Decomposition.cpp @@ -7,9 +7,8 @@ ******************************************************************************/ #include "DecompositionPatterns.h" +#include "PassDetails.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/PatternMatch.h" @@ -17,16 +16,13 @@ #include "mlir/Rewrite/FrozenRewritePatternSet.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -using namespace mlir; - -//===----------------------------------------------------------------------===// -// Generated logic -//===----------------------------------------------------------------------===// namespace cudaq::opt { #define GEN_PASS_DEF_DECOMPOSITION #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt +using namespace mlir; + namespace { //===----------------------------------------------------------------------===// @@ -102,7 +98,7 @@ struct Decomposition // Process kernels in parallel LogicalResult rewriteResult = failableParallelForEach( module.getContext(), kernels, [&](Operation *op) { - LogicalResult converged = applyPatternsAndFoldGreedily(op, patterns); + LogicalResult converged = applyPatternsGreedily(op, patterns); // Decomposition is best-effort. Non-convergence is only a pass // failure if the user asked for convergence. diff --git a/lib/Optimizer/Transforms/DecompositionPatternSelection.cpp b/lib/Optimizer/Transforms/DecompositionPatternSelection.cpp index bde2025353e..1d8c3b734d9 100644 --- a/lib/Optimizer/Transforms/DecompositionPatternSelection.cpp +++ b/lib/Optimizer/Transforms/DecompositionPatternSelection.cpp @@ -7,34 +7,23 @@ ******************************************************************************/ #include "DecompositionPatterns.h" -#include "cudaq/Optimizer/Dialect/CC/CCDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" -#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" -#include "mlir/Dialect/Math/IR/Math.h" +#include "PassDetails.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringMap.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" #include -#include -#include -#include -#include -#include -#include #include #include #include -#include using namespace mlir; namespace { -//===----------------------------------------------------------------------===// // ConversionTarget and OperatorInfo, parsed from target basis strings such as // ["x", "x(1)", "z"] -//===----------------------------------------------------------------------===// - struct OperatorInfo { StringRef name; std::size_t numControls; @@ -136,8 +125,6 @@ struct hash { }; } // namespace std -namespace { - // Computes a hash of the given unordered set using the hashes of the elements // in the set. template @@ -150,6 +137,7 @@ std::size_t computeSetHash(const std::unordered_set &set) { return llvm::hash_combine_range(hashes.begin(), hashes.end()); } +namespace { //===----------------------------------------------------------------------===// // Decomposition Graph for Pattern Selection //===----------------------------------------------------------------------===// @@ -357,7 +345,6 @@ class DecompositionGraph { std::unordered_map> patternSelectionCache; }; - } // namespace std::unique_ptr diff --git a/lib/Optimizer/Transforms/DecompositionPatterns.cpp b/lib/Optimizer/Transforms/DecompositionPatterns.cpp index ed3ef43a874..1f5a1ca7614 100644 --- a/lib/Optimizer/Transforms/DecompositionPatterns.cpp +++ b/lib/Optimizer/Transforms/DecompositionPatterns.cpp @@ -6,6 +6,15 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ +#include "DecompositionPatterns.h" +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Factory.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/TypeName.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Rewrite/FrozenRewritePatternSet.h" + /** * This file contains the decomposition patterns that match single gates and * decompose them into a sequence of other gates. @@ -22,27 +31,10 @@ * macro can be used for this purpose instead. */ -#include "DecompositionPatterns.h" -#include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Rewrite/FrozenRewritePatternSet.h" -#include -#include -#include -#include -#include -#include -#include - using namespace mlir; LLVM_INSTANTIATE_REGISTRY(cudaq::DecompositionPatternTypeRegistry) -namespace { - //===----------------------------------------------------------------------===// // Helpers //===----------------------------------------------------------------------===// @@ -55,23 +47,24 @@ inline Value createConstant(Location loc, double value, Type type, inline Value createConstant(Location loc, std::size_t value, PatternRewriter &rewriter) { - return rewriter.create(loc, value, 64); + return arith::ConstantIntOp::create(rewriter, loc, value, 64); } inline Value createDivF(Location loc, Value numerator, double denominator, PatternRewriter &rewriter) { auto denominatorValue = createConstant(loc, denominator, numerator.getType(), rewriter); - return rewriter.create(loc, numerator, denominatorValue); + return arith::DivFOp::create(rewriter, loc, numerator, denominatorValue); } /// @brief Returns true if \p op contains any `ControlType` operands. inline bool containsControlTypes(quake::OperatorInterface op) { return llvm::any_of(op.getControls(), [](const Value &v) { - return v.getType().isa(); + return isa(v.getType()); }); } +namespace { /// @brief This is a wrapper class for `PatternRewriter::create<>()` for /// `QuakeOperator`s. If the controls and targets are `quake::WireType`, then /// this wrapper class's methods update the controls and targets in the `create` @@ -85,7 +78,7 @@ class QuakeOperatorCreator { /// builder for cases when you have one input ValueRange. SmallVector getResultType(ValueRange operands) { std::size_t numOutputWires = llvm::count_if(operands, [](const Value &v) { - return v.getType().isa(); + return isa(v.getType()); }); return SmallVector(numOutputWires, @@ -98,9 +91,9 @@ class QuakeOperatorCreator { std::size_t numOutputWires = llvm::count_if( operands1, - [](const Value &v) { return v.getType().isa(); }) + + [](const Value &v) { return isa(v.getType()); }) + llvm::count_if(operands2, [](const Value &v) { - return v.getType().isa(); + return isa(v.getType()); }); return SmallVector(numOutputWires, @@ -112,7 +105,7 @@ class QuakeOperatorCreator { void selectWiresAndReplaceUses(Operation *op, ValueRange newValues) { SmallVector newWireValues; for (const auto &v : newValues) - if (v.getType().isa()) + if (isa(v.getType())) newWireValues.push_back(v); assert(op->getResults().size() == newWireValues.size() && "incorrect number of output wires provided"); @@ -125,9 +118,9 @@ class QuakeOperatorCreator { Value target) { SmallVector newWireValues; for (const auto &v : controls) - if (v.getType().isa()) + if (isa(v.getType())) newWireValues.push_back(v); - if (target.getType().isa()) + if (isa(target.getType())) newWireValues.push_back(target); assert(op->getResults().size() == newWireValues.size() && "incorrect number of output wires provided"); @@ -137,13 +130,12 @@ class QuakeOperatorCreator { template OpTy create(Location location, Value &target) { OpTy op; - op = rewriter.create(location, getResultType(target), false, - ValueRange{}, ValueRange{}, target, - DenseBoolArrayAttr{}); + op = OpTy::create(rewriter, location, getResultType(target), false, + ValueRange{}, ValueRange{}, target, DenseBoolArrayAttr{}); auto resultWires = op.getWires(); auto resultIt = resultWires.begin(); auto resultWiresEnd = resultWires.end(); - if (target.getType().isa() && resultIt != resultWiresEnd) + if (isa(target.getType()) && resultIt != resultWiresEnd) target = *resultIt; return op; } @@ -151,13 +143,12 @@ class QuakeOperatorCreator { template OpTy create(Location location, bool is_adj, Value &target) { OpTy op; - op = rewriter.create(location, getResultType(target), is_adj, - ValueRange{}, ValueRange{}, target, - DenseBoolArrayAttr{}); + op = OpTy::create(rewriter, location, getResultType(target), is_adj, + ValueRange{}, ValueRange{}, target, DenseBoolArrayAttr{}); auto resultWires = op.getWires(); auto resultIt = resultWires.begin(); auto resultWiresEnd = resultWires.end(); - if (target.getType().isa() && resultIt != resultWiresEnd) + if (isa(target.getType()) && resultIt != resultWiresEnd) target = *resultIt; return op; } @@ -165,15 +156,14 @@ class QuakeOperatorCreator { template OpTy create(Location location, Value &control, Value &target) { OpTy op; - op = rewriter.create(location, getResultType(control, target), false, - ValueRange{}, control, target, - DenseBoolArrayAttr{}); + op = OpTy::create(rewriter, location, getResultType(control, target), false, + ValueRange{}, control, target, DenseBoolArrayAttr{}); auto resultWires = op.getWires(); auto resultIt = resultWires.begin(); auto resultWiresEnd = resultWires.end(); - if (control.getType().isa() && resultIt != resultWiresEnd) + if (isa(control.getType()) && resultIt != resultWiresEnd) control = *resultIt++; - if (target.getType().isa() && resultIt != resultWiresEnd) + if (isa(target.getType()) && resultIt != resultWiresEnd) target = *resultIt; return op; } @@ -182,16 +172,16 @@ class QuakeOperatorCreator { OpTy create(Location location, bool is_adj, ValueRange parameters, SmallVectorImpl &controls, Value &target) { OpTy op; - op = rewriter.create(location, getResultType(controls, target), - is_adj, parameters, controls, target, - DenseBoolArrayAttr{}); + op = OpTy::create(rewriter, location, getResultType(controls, target), + is_adj, parameters, controls, target, + DenseBoolArrayAttr{}); auto resultWires = op.getWires(); auto resultIt = resultWires.begin(); auto resultWiresEnd = resultWires.end(); for (auto &c : controls) - if (c.getType().isa() && resultIt != resultWiresEnd) + if (isa(c.getType()) && resultIt != resultWiresEnd) c = *resultIt++; - if (target.getType().isa() && resultIt != resultWiresEnd) + if (isa(target.getType()) && resultIt != resultWiresEnd) target = *resultIt; return op; } @@ -200,16 +190,16 @@ class QuakeOperatorCreator { OpTy create(Location location, ValueRange parameters, SmallVectorImpl &controls, Value &target) { OpTy op; - op = rewriter.create(location, getResultType(controls, target), false, - parameters, controls, target, - DenseBoolArrayAttr{}); + op = + OpTy::create(rewriter, location, getResultType(controls, target), false, + parameters, controls, target, DenseBoolArrayAttr{}); auto resultWires = op.getWires(); auto resultIt = resultWires.begin(); auto resultWiresEnd = resultWires.end(); for (auto &c : controls) - if (c.getType().isa() && resultIt != resultWiresEnd) + if (isa(c.getType()) && resultIt != resultWiresEnd) c = *resultIt++; - if (target.getType().isa() && resultIt != resultWiresEnd) + if (isa(target.getType()) && resultIt != resultWiresEnd) target = *resultIt; return op; } @@ -218,16 +208,16 @@ class QuakeOperatorCreator { OpTy create(Location location, SmallVectorImpl &controls, Value &target) { OpTy op; - op = rewriter.create(location, getResultType(controls, target), false, - ValueRange{}, controls, target, - DenseBoolArrayAttr{}); + op = + OpTy::create(rewriter, location, getResultType(controls, target), false, + ValueRange{}, controls, target, DenseBoolArrayAttr{}); auto resultWires = op.getWires(); auto resultIt = resultWires.begin(); auto resultWiresEnd = resultWires.end(); for (auto &c : controls) - if (c.getType().isa() && resultIt != resultWiresEnd) + if (isa(c.getType()) && resultIt != resultWiresEnd) c = *resultIt++; - if (target.getType().isa() && resultIt != resultWiresEnd) + if (isa(target.getType()) && resultIt != resultWiresEnd) target = *resultIt; return op; } @@ -235,14 +225,14 @@ class QuakeOperatorCreator { template OpTy create(Location location, SmallVectorImpl &targets) { OpTy op; - op = rewriter.create(location, getResultType(targets), false, - ValueRange{}, ValueRange{}, targets, - DenseBoolArrayAttr{}); + op = + OpTy::create(rewriter, location, getResultType(targets), false, + ValueRange{}, ValueRange{}, targets, DenseBoolArrayAttr{}); auto resultWires = op.getWires(); auto resultIt = resultWires.begin(); auto resultWiresEnd = resultWires.end(); for (auto &t : targets) - if (t.getType().isa() && resultIt != resultWiresEnd) + if (isa(t.getType()) && resultIt != resultWiresEnd) t = *resultIt++; return op; } @@ -250,13 +240,14 @@ class QuakeOperatorCreator { private: PatternRewriter &rewriter; }; +} // namespace /// Check whether the operation has the correct number of controls. /// /// Note: This function assumes that the operation has already been tested for /// reference semantics. -LogicalResult checkNumControls(quake::OperatorInterface op, - std::size_t requiredNumControls) { +static LogicalResult checkNumControls(quake::OperatorInterface op, + std::size_t requiredNumControls) { auto opControls = op.getControls(); if (opControls.size() > requiredNumControls) return failure(); @@ -283,9 +274,9 @@ LogicalResult checkNumControls(quake::OperatorInterface op, /// /// Note: This function assumes that the operation has already been tested for /// reference semantics. -LogicalResult checkAndExtractControls(quake::OperatorInterface op, - MutableArrayRef controls, - PatternRewriter &rewriter) { +static LogicalResult checkAndExtractControls(quake::OperatorInterface op, + MutableArrayRef controls, + PatternRewriter &rewriter) { if (failed(checkNumControls(op, controls.size()))) return failure(); @@ -295,7 +286,7 @@ LogicalResult checkAndExtractControls(quake::OperatorInterface op, for (std::size_t i = 0, end = veq.getSize(); i < end; ++i) { Value index = createConstant(op.getLoc(), i, rewriter); Value qref = - rewriter.create(op.getLoc(), control, index); + quake::ExtractRefOp::create(rewriter, op.getLoc(), control, index); controls[controlIndex] = qref; controlIndex += 1; } @@ -317,6 +308,7 @@ LogicalResult checkAndExtractControls(quake::OperatorInterface op, /// "target2", ...) /// where "source_op" is the operation that the pattern matches and /// {"target1", "target2", ...} are the operations that the pattern may produce. +#undef REGISTER_DECOMPOSITION_PATTERN #define REGISTER_DECOMPOSITION_PATTERN(PATTERN, SOURCE_OP, ...) \ struct PATTERN##Type : public cudaq::DecompositionPatternType { \ using cudaq::DecompositionPatternType::DecompositionPatternType; \ @@ -345,11 +337,11 @@ LogicalResult checkAndExtractControls(quake::OperatorInterface op, // HOp decompositions //===----------------------------------------------------------------------===// +namespace { // quake.h target // ─────────────────────────────────── // quake.phased_rx(π/2, π/2) target // quake.phased_rx(π, 0) target - struct HToPhasedRxType; // forward declare the pattern type, defined in the // macro below struct HToPhasedRx @@ -407,7 +399,7 @@ struct ExpPauliDecomposition auto pauliWord = expPauliOp.getPauli(); if (expPauliOp.isAdj()) - theta = rewriter.create(loc, theta); + theta = arith::NegFOp::create(rewriter, loc, theta); std::optional optPauliWordStr; if (!pauliWord) { @@ -503,19 +495,19 @@ struct ExpPauliDecomposition SmallVector qubitSupport; for (std::size_t i = 0; i < size; i++) { - Value index = rewriter.create(loc, i, 64); - Value qubitI = rewriter.create(loc, qubits, index); + Value index = arith::ConstantIntOp::create(rewriter, loc, i, 64); + Value qubitI = quake::ExtractRefOp::create(rewriter, loc, qubits, index); if (pauliWordStr[i] != 'I') qubitSupport.push_back(qubitI); if (pauliWordStr[i] == 'Y') { APFloat d(M_PI_2); - Value param = rewriter.create( - loc, d, rewriter.getF64Type()); - rewriter.create(loc, ValueRange{param}, ValueRange{}, - ValueRange{qubitI}); + Value param = arith::ConstantFloatOp::create(rewriter, loc, + rewriter.getF64Type(), d); + quake::RxOp::create(rewriter, loc, ValueRange{param}, ValueRange{}, + ValueRange{qubitI}); } else if (pauliWordStr[i] == 'X') { - rewriter.create(loc, ValueRange{qubitI}); + quake::HOp::create(rewriter, loc, ValueRange{qubitI}); } } @@ -529,34 +521,35 @@ struct ExpPauliDecomposition std::vector> toReverse; for (std::size_t i = 0; i < qubitSupport.size() - 1; i++) { - rewriter.create(loc, ValueRange{qubitSupport[i]}, - ValueRange{qubitSupport[i + 1]}); + quake::XOp::create(rewriter, loc, ValueRange{qubitSupport[i]}, + ValueRange{qubitSupport[i + 1]}); toReverse.emplace_back(qubitSupport[i], qubitSupport[i + 1]); } // Note: `Rz(theta)` = `exp(-i*theta/2 Z)` - Value negTwoTheta = rewriter.create( - loc, createConstant(loc, -2.0, rewriter.getF64Type(), rewriter), theta); - rewriter.create(loc, ValueRange{negTwoTheta}, ValueRange{}, - ValueRange{qubitSupport.back()}); + Value negTwoTheta = arith::MulFOp::create( + rewriter, loc, + createConstant(loc, -2.0, rewriter.getF64Type(), rewriter), theta); + quake::RzOp::create(rewriter, loc, ValueRange{negTwoTheta}, ValueRange{}, + ValueRange{qubitSupport.back()}); std::reverse(toReverse.begin(), toReverse.end()); for (auto &[i, j] : toReverse) - rewriter.create(loc, ValueRange{i}, ValueRange{j}); + quake::XOp::create(rewriter, loc, ValueRange{i}, ValueRange{j}); for (std::size_t i = 0; i < pauliWordStr.size(); i++) { std::size_t k = pauliWordStr.size() - 1 - i; - Value index = rewriter.create(loc, k, 64); - Value qubitK = rewriter.create(loc, qubits, index); + Value index = arith::ConstantIntOp::create(rewriter, loc, k, 64); + Value qubitK = quake::ExtractRefOp::create(rewriter, loc, qubits, index); if (pauliWordStr[k] == 'Y') { APFloat d(-M_PI_2); - Value param = rewriter.create( - loc, d, rewriter.getF64Type()); - rewriter.create(loc, ValueRange{param}, ValueRange{}, - ValueRange{qubitK}); + Value param = arith::ConstantFloatOp::create(rewriter, loc, + rewriter.getF64Type(), d); + quake::RxOp::create(rewriter, loc, ValueRange{param}, ValueRange{}, + ValueRange{qubitK}); } else if (pauliWordStr[k] == 'X') { - rewriter.create(loc, ValueRange{qubitK}); + quake::HOp::create(rewriter, loc, ValueRange{qubitK}); } } @@ -633,7 +626,7 @@ struct R1AdjToR1 Location loc = op->getLoc(); Value target = op.getTarget(); Value angle = op.getParameter(); - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; @@ -752,7 +745,7 @@ struct SToPhasedRx SmallVector noControls; Value zero = createConstant(loc, 0.0, rewriter.getF64Type(), rewriter); Value pi_2 = createConstant(loc, M_PI_2, rewriter.getF64Type(), rewriter); - Value negPi_2 = rewriter.create(loc, pi_2); + Value negPi_2 = arith::NegFOp::create(rewriter, loc, pi_2); Value angle = op.isAdj() ? pi_2 : negPi_2; @@ -830,13 +823,13 @@ struct TToPhasedRx Value target = op.getTarget(); Value angle = createConstant(loc, -M_PI_4, rewriter.getF64Type(), rewriter); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; Value zero = createConstant(loc, 0.0, rewriter.getF64Type(), rewriter); Value pi_2 = createConstant(loc, M_PI_2, rewriter.getF64Type(), rewriter); - Value negPi_2 = rewriter.create(loc, pi_2); + Value negPi_2 = arith::NegFOp::create(rewriter, loc, pi_2); std::array parameters = {pi_2, zero}; QuakeOperatorCreator qRewriter(rewriter); @@ -1234,7 +1227,7 @@ struct ZToPhasedRx Value zero = createConstant(loc, 0.0, rewriter.getF64Type(), rewriter); Value negPi = createConstant(loc, -M_PI, rewriter.getF64Type(), rewriter); Value pi_2 = createConstant(loc, M_PI_2, rewriter.getF64Type(), rewriter); - Value negPi_2 = rewriter.create(loc, pi_2); + Value negPi_2 = arith::NegFOp::create(rewriter, loc, pi_2); std::array parameters = {pi_2, zero}; QuakeOperatorCreator qRewriter(rewriter); @@ -1289,12 +1282,12 @@ struct CR1ToCX : public cudaq::DecompositionPattern { negControl = (*negatedControls)[0]; if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; Value halfAngle = createDivF(loc, angle, 2.0, rewriter); - Value negHalfAngle = rewriter.create(loc, halfAngle); + Value negHalfAngle = arith::NegFOp::create(rewriter, loc, halfAngle); QuakeOperatorCreator qRewriter(rewriter); qRewriter.create(loc, /*isAdj*/ negControl, halfAngle, @@ -1334,15 +1327,15 @@ struct R1ToPhasedRx Value target = op.getTarget(); Value angle = op.getParameter(); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); Type angleType = op.getParameter().getType(); // Necessary/Helpful constants SmallVector noControls; Value zero = createConstant(loc, 0.0, angleType, rewriter); Value pi_2 = createConstant(loc, M_PI_2, angleType, rewriter); - Value negPi_2 = rewriter.create(loc, pi_2); - Value negAngle = rewriter.create(loc, angle); + Value negPi_2 = arith::NegFOp::create(rewriter, loc, pi_2); + Value negAngle = arith::NegFOp::create(rewriter, loc, angle); std::array parameters = {pi_2, zero}; QuakeOperatorCreator qRewriter(rewriter); @@ -1396,13 +1389,13 @@ struct CRxToCX : public cudaq::DecompositionPattern { Value angle = op.getParameter(); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); Type angleType = op.getParameter().getType(); // Necessary/Helpful constants SmallVector noControls; Value halfAngle = createDivF(loc, angle, 2.0, rewriter); - Value negHalfAngle = rewriter.create(loc, halfAngle); + Value negHalfAngle = arith::NegFOp::create(rewriter, loc, halfAngle); Value negPI_2 = createConstant(loc, -M_PI_2, angleType, rewriter); QuakeOperatorCreator qRewriter(rewriter); @@ -1442,7 +1435,7 @@ struct RxToPhasedRx Value target = op.getTarget(); Value angle = op.getParameter(); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); Type angleType = op.getParameter().getType(); // Necessary/Helpful constants @@ -1482,7 +1475,7 @@ struct RxAdjToRx Location loc = op->getLoc(); Value target = op.getTarget(); Value angle = op.getParameter(); - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; @@ -1530,12 +1523,12 @@ struct CRyToCX : public cudaq::DecompositionPattern { Value angle = op.getParameter(); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; Value halfAngle = createDivF(loc, angle, 2.0, rewriter); - Value negHalfAngle = rewriter.create(loc, halfAngle); + Value negHalfAngle = arith::NegFOp::create(rewriter, loc, halfAngle); QuakeOperatorCreator qRewriter(rewriter); qRewriter.create(loc, halfAngle, noControls, target); @@ -1571,7 +1564,7 @@ struct RyToPhasedRx Value target = op.getTarget(); Value angle = op.getParameter(); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); Type angleType = op.getParameter().getType(); // Necessary/Helpful constants @@ -1611,7 +1604,7 @@ struct RyAdjToRy Location loc = op->getLoc(); Value target = op.getTarget(); Value angle = op.getParameter(); - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; @@ -1659,12 +1652,12 @@ struct CRzToCX : public cudaq::DecompositionPattern { Value angle = op.getParameter(); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; Value halfAngle = createDivF(loc, angle, 2.0, rewriter); - Value negHalfAngle = rewriter.create(loc, halfAngle); + Value negHalfAngle = arith::NegFOp::create(rewriter, loc, halfAngle); QuakeOperatorCreator qRewriter(rewriter); qRewriter.create(loc, halfAngle, noControls, target); @@ -1702,15 +1695,15 @@ struct RzToPhasedRx Value target = op.getTarget(); Value angle = op.getParameter(); if (op.isAdj()) - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); Type angleType = op.getParameter().getType(); // Necessary/Helpful constants SmallVector noControls; Value zero = createConstant(loc, 0.0, angleType, rewriter); Value pi_2 = createConstant(loc, M_PI_2, angleType, rewriter); - Value negPi_2 = rewriter.create(loc, pi_2); - Value negAngle = rewriter.create(loc, angle); + Value negPi_2 = arith::NegFOp::create(rewriter, loc, pi_2); + Value negAngle = arith::NegFOp::create(rewriter, loc, angle); std::array parameters = {pi_2, zero}; QuakeOperatorCreator qRewriter(rewriter); @@ -1751,7 +1744,7 @@ struct RzAdjToRz Location loc = op->getLoc(); Value target = op.getTarget(); Value angle = op.getParameter(); - angle = rewriter.create(loc, angle); + angle = arith::NegFOp::create(rewriter, loc, angle); // Necessary/Helpful constants SmallVector noControls; @@ -1796,17 +1789,17 @@ struct U3ToRotations Value lam = op.getParameters()[2]; if (op.isAdj()) { - theta = rewriter.create(loc, theta); + theta = arith::NegFOp::create(rewriter, loc, theta); // swap the 2nd and 3rd parameter for correctness std::swap(phi, lam); - phi = rewriter.create(loc, phi); - lam = rewriter.create(loc, lam); + phi = arith::NegFOp::create(rewriter, loc, phi); + lam = arith::NegFOp::create(rewriter, loc, lam); } // Necessary/Helpful constants Type angleType = op.getParameter().getType(); Value pi_2 = createConstant(loc, M_PI_2, angleType, rewriter); - Value negPi_2 = rewriter.create(loc, pi_2); + Value negPi_2 = arith::NegFOp::create(rewriter, loc, pi_2); QuakeOperatorCreator qRewriter(rewriter); qRewriter.create(loc, lam, controls, target); diff --git a/lib/Optimizer/Transforms/DecompositionPatterns.h b/lib/Optimizer/Transforms/DecompositionPatterns.h index 20b402abd5e..1cad9d3fb9d 100644 --- a/lib/Optimizer/Transforms/DecompositionPatterns.h +++ b/lib/Optimizer/Transforms/DecompositionPatterns.h @@ -8,6 +8,7 @@ #pragma once +#define LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING 1 #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Registry.h" #include "mlir/IR/PatternMatch.h" @@ -28,8 +29,13 @@ namespace cudaq { /// system. Stores the pattern metadata and provides a factory method to create /// new instances of the pattern. /// +/// Register decomposition patterns using +/// CUDAQ_REGISTER_TYPE(cudaq::DecompositionPatternType, MyPatternType, +/// pattern_name) +/// where pattern_name is the same as MyPatternType().getPatternName(). class DecompositionPatternType { public: + using RegistryType = llvm::Registry; virtual ~DecompositionPatternType() = default; /// Get the source operation this pattern matches and decomposes. @@ -102,3 +108,9 @@ createBasisTarget(mlir::MLIRContext &context, using DecompositionPatternTypeRegistry = llvm::Registry; } // namespace cudaq + +/// Register a decomposition pattern type with the LLVM registry. +/// This is compiler-internal only (no cross-DSO / Python concerns). +#define REGISTER_DECOMPOSITION_PATTERN(SUBTYPE, NAME) \ + static cudaq::DecompositionPatternType::RegistryType::Add \ + decomp_reg_##NAME(#NAME, ""); diff --git a/lib/Optimizer/Transforms/DependencyAnalysis.cpp b/lib/Optimizer/Transforms/DependencyAnalysis.cpp index 580dd6d4a86..9951d8d02db 100644 --- a/lib/Optimizer/Transforms/DependencyAnalysis.cpp +++ b/lib/Optimizer/Transforms/DependencyAnalysis.cpp @@ -16,6 +16,14 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" +//===----------------------------------------------------------------------===// +// Generated logic +//===----------------------------------------------------------------------===// +namespace cudaq::opt { +#define GEN_PASS_DEF_DEPENDENCYANALYSIS +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + #define DEBUG_TYPE "dep-analysis" using namespace mlir; @@ -25,14 +33,6 @@ using namespace mlir; #define RAW_GATE_OPS GATE_OPS(RAW) #define RAW_QUANTUM_OPS QUANTUM_OPS(RAW) -//===----------------------------------------------------------------------===// -// Generated logic -//===----------------------------------------------------------------------===// -namespace cudaq::opt { -#define GEN_PASS_DEF_DEPENDENCYANALYSIS -#include "cudaq/Optimizer/Transforms/Passes.h.inc" -} // namespace cudaq::opt - namespace { // TODO: Someday, it would probably make sense to make VirtualQIDs and // PhysicalQIDs be data structures with metadata, not just integer @@ -653,8 +653,8 @@ class InitDependencyNode : public DependencyNode { assert(qubit.has_value() && "Trying to codeGen a virtual allocation " "without a physical qubit assigned!"); auto wirety = quake::WireType::get(builder.getContext()); - auto alloc = builder.create( - builder.getUnknownLoc(), wirety, + auto alloc = quake::BorrowWireOp::create( + builder, builder.getUnknownLoc(), wirety, cudaq::opt::topologyAgnosticWiresetName, qubit.value()); wire = alloc.getResult(); hasCodeGen = true; @@ -760,13 +760,13 @@ class OpDependencyNode : public DependencyNode { std::string getOpName() override { if (isa(associated)) { if (auto cstf = dyn_cast(associated)) { - auto value = cstf.getValue().cast().getValueAsDouble(); + auto value = cast(cstf.getValue()).getValueAsDouble(); return std::to_string(value); } else if (auto cstidx = dyn_cast(associated)) { - auto value = cstidx.getValue().cast().getInt(); + auto value = cast(cstidx.getValue()).getInt(); return std::to_string(value); } else if (auto cstint = dyn_cast(associated)) { - auto value = cstint.getValue().cast().getInt(); + auto value = cast(cstint.getValue()).getInt(); return std::to_string(value); } } @@ -800,9 +800,9 @@ class OpDependencyNode : public DependencyNode { auto oldOp = associated; auto operands = gatherOperands(builder); - associated = - Operation::create(oldOp->getLoc(), oldOp->getName(), - oldOp->getResultTypes(), operands, oldOp->getAttrs()); + associated = Operation::create( + oldOp->getLoc(), oldOp->getName(), oldOp->getResultTypes(), operands, + oldOp->getAttrs(), OpaqueProperties{nullptr}); associated->removeAttr("dnodeid"); builder.insert(associated); } @@ -1710,7 +1710,7 @@ class RootDependencyNode : public OpDependencyNode { void genOp(OpBuilder &builder) override { auto wire = dependencies[0].getValue(); auto newOp = - builder.create(builder.getUnknownLoc(), wire); + quake::ReturnWireOp::create(builder, builder.getUnknownLoc(), wire); newOp->setAttrs(associated->getAttrs()); newOp->removeAttr("dnodeid"); associated = newOp; @@ -2605,7 +2605,7 @@ class IfDependencyNode : public OpDependencyNode { } auto newIf = - builder.create(oldOp->getLoc(), results, operands); + cudaq::cc::IfOp::create(builder, oldOp->getLoc(), results, operands); auto *then_region = &newIf.getThenRegion(); then_block->codeGen(builder, then_region); @@ -3137,7 +3137,7 @@ class DependencyAnalysisEngine { // and thus should have a memoized dnode for defOp, fail if not assert(defOp->hasAttr("dnodeid") && "No dnodeid found for operation"); - auto id = defOp->getAttr("dnodeid").cast().getUInt(); + auto id = cast(defOp->getAttr("dnodeid")).getUInt(); auto dnode = perOp[id]; if (!ifStack.empty() && defOp->getParentOp() != ifStack.back() && diff --git a/lib/Optimizer/Transforms/DistributedDeviceCall.cpp b/lib/Optimizer/Transforms/DistributedDeviceCall.cpp index 1e944626f8f..279b7328a0c 100644 --- a/lib/Optimizer/Transforms/DistributedDeviceCall.cpp +++ b/lib/Optimizer/Transforms/DistributedDeviceCall.cpp @@ -13,6 +13,7 @@ #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" +#include "llvm/Support/MD5.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeSupport.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -78,10 +79,9 @@ class QIRVendorDeviceCallPat // Error code 2 is used to indicate illegal execution of unreachable // code. Value errorCodeTwo = - rewriter.create(devcall.getLoc(), 2, 64); - rewriter.create(devcall.getLoc(), TypeRange{}, - cudaq::opt::QISTrap, - ValueRange{errorCodeTwo}); + arith::ConstantIntOp::create(rewriter, devcall.getLoc(), 2, 64); + func::CallOp::create(rewriter, devcall.getLoc(), TypeRange{}, + cudaq::opt::QISTrap, ValueRange{errorCodeTwo}); // For return (after the trap), load from nullptr to create return value // of the same type as the device function, i.e., `return *(T*)nullptr;` // for return type `T`. @@ -90,18 +90,18 @@ class QIRVendorDeviceCallPat // function. SmallVector trapResults; for (Type resTy : devFunc.getFunctionType().getResults()) { - auto nullPtr = rewriter.create( - devcall.getLoc(), + auto nullPtr = arith::ConstantOp::create( + rewriter, devcall.getLoc(), rewriter.getZeroAttr(rewriter.getIntegerType(64))); auto ptrTy = cudaq::cc::PointerType::get(resTy); - auto castedNullPtr = rewriter.create( - devcall.getLoc(), ptrTy, nullPtr); - auto loadedVal = rewriter.create(devcall.getLoc(), - castedNullPtr); + auto castedNullPtr = cudaq::cc::CastOp::create( + rewriter, devcall.getLoc(), ptrTy, nullPtr); + auto loadedVal = cudaq::cc::LoadOp::create(rewriter, devcall.getLoc(), + castedNullPtr); trapResults.push_back(loadedVal); } - rewriter.create(devcall.getLoc(), trapResults); + func::ReturnOp::create(rewriter, devcall.getLoc(), trapResults); } // (2) Set this trap function as private and weak_odr linkage, to allow // multiple definitions across translation units without linker errors. @@ -123,7 +123,7 @@ class QIRVendorDeviceCallPat // weak_odr linkage. rewriter.replaceOpWithNewOp( devcall, devFunc.getFunctionType().getResults(), devFuncName, - devcall.getArgs()); + devcall.getArgs(), ArrayAttr{}, ArrayAttr{}); return success(); } @@ -167,8 +167,9 @@ class ResolveDevicePtrOpPat LogicalResult matchAndRewrite(cudaq::cc::ResolveDevicePtrOp resolve, PatternRewriter &rewriter) const override { auto loc = resolve.getLoc(); - auto call = rewriter.create( - loc, TypeRange{cudaq::cc::PointerType::get(rewriter.getI8Type())}, + auto call = func::CallOp::create( + rewriter, loc, + TypeRange{cudaq::cc::PointerType::get(rewriter.getI8Type())}, cudaq::runtime::extractDevPtr, ValueRange{resolve.getDevicePtr()}); rewriter.replaceOpWithNewOp( resolve, resolve.getResult().getType(), call.getResult(0)); @@ -202,7 +203,7 @@ class DistributedDeviceCallPass patterns.add(ctx); patterns.insert(ctx, insertTrapImplementation); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) signalPassFailure(); return; } diff --git a/lib/Optimizer/Transforms/EraseNoise.cpp b/lib/Optimizer/Transforms/EraseNoise.cpp index d7f86771a66..746bb89bec9 100644 --- a/lib/Optimizer/Transforms/EraseNoise.cpp +++ b/lib/Optimizer/Transforms/EraseNoise.cpp @@ -47,7 +47,7 @@ class EraseNoisePass : public cudaq::opt::impl::EraseNoiseBase { auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After erasure:\n" << *op << "\n\n"); } diff --git a/lib/Optimizer/Transforms/EraseNopCalls.cpp b/lib/Optimizer/Transforms/EraseNopCalls.cpp index ef35056b056..d334bf75f5f 100644 --- a/lib/Optimizer/Transforms/EraseNopCalls.cpp +++ b/lib/Optimizer/Transforms/EraseNopCalls.cpp @@ -51,7 +51,7 @@ class EraseNopCallsPass auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After erasure:\n" << *op << "\n\n"); } diff --git a/lib/Optimizer/Transforms/EraseVectorCopyCtor.cpp b/lib/Optimizer/Transforms/EraseVectorCopyCtor.cpp index f3daf62f7d1..e35c5709517 100644 --- a/lib/Optimizer/Transforms/EraseVectorCopyCtor.cpp +++ b/lib/Optimizer/Transforms/EraseVectorCopyCtor.cpp @@ -32,11 +32,11 @@ struct PatternAnalysis { // Transformation is: // // %36 = func.call @malloc(%35) : (i64) -> !cc.ptr -// func.call @llvm.memcpy.p0i8.p0i8.i64(%36, %34, %35, %false) : +// func.call @llvm.memcpy.p0.p0.i64(%36, %34, %35, %false) : // (!cc.ptr, !cc.ptr, i64, i1) -> () // %37 = cc.alloca i8[%35 : i64] // %38 = cc.cast %37 : (!cc.ptr>) -> !cc.ptr -// func.call @llvm.memcpy.p0i8.p0i8.i64(%38, %36, %35, %false) : +// func.call @llvm.memcpy.p0.p0.i64(%38, %36, %35, %false) : // (!cc.ptr, !cc.ptr, i64, i1) -> () // func.call @free(%36) : (!cc.ptr) -> () // ─────────────────────────────────────────────────────────────── @@ -68,11 +68,11 @@ class EraseVectorCopyCtorPattern : public OpRewritePattern { if (globalConst) { auto ip = rewriter.saveInsertionPoint(); rewriter.setInsertionPointAfter(analysis.copyFrom); - auto loaded = rewriter.create( - analysis.copyFrom.getLoc(), globalConst); + auto loaded = cudaq::cc::LoadOp::create( + rewriter, analysis.copyFrom.getLoc(), globalConst); rewriter.setInsertionPointAfter(analysis.copyTo); - rewriter.create(analysis.copyTo.getLoc(), loaded, - newStackSlot); + cudaq::cc::StoreOp::create(rewriter, analysis.copyTo.getLoc(), loaded, + newStackSlot); rewriter.restoreInsertionPoint(ip); } else { rewriter.replaceOpWithNewOp( @@ -126,7 +126,7 @@ class EraseVectorCopyCtorPass auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After erasure:\n" << *op << "\n\n"); } diff --git a/lib/Optimizer/Transforms/ExpandControlVeqs.cpp b/lib/Optimizer/Transforms/ExpandControlVeqs.cpp index 0548d181a38..ac227107651 100644 --- a/lib/Optimizer/Transforms/ExpandControlVeqs.cpp +++ b/lib/Optimizer/Transforms/ExpandControlVeqs.cpp @@ -7,7 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" @@ -54,7 +53,7 @@ class ExpandPat : public OpRewritePattern { // The veq is not added the newControls, so it will be dropped for (size_t i = 0; i < *size; ++i) { auto ext = - rewriter.create(op.getLoc(), veqVal, i); + quake::ExtractRefOp::create(rewriter, op.getLoc(), veqVal, i); newControls.push_back(ext); update = true; } diff --git a/lib/Optimizer/Transforms/ExpandMeasurements.cpp b/lib/Optimizer/Transforms/ExpandMeasurements.cpp index 17682471337..45117d5ee47 100644 --- a/lib/Optimizer/Transforms/ExpandMeasurements.cpp +++ b/lib/Optimizer/Transforms/ExpandMeasurements.cpp @@ -8,14 +8,17 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" +namespace cudaq::opt { +#define GEN_PASS_DEF_EXPANDMEASUREMENTS +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + using namespace mlir; // Only an individual qubit measurement returns a scalar token. Both @@ -42,63 +45,59 @@ class ExpandRewritePattern : public OpRewritePattern { // in. unsigned numQubits = 0u; for (auto v : measureOp.getTargets()) - if (v.getType().template isa()) + if (isa(v.getType())) ++numQubits; Value totalToRead = - rewriter.template create(loc, numQubits, 64); + arith::ConstantIntOp::create(rewriter, loc, numQubits, 64); auto i64Ty = rewriter.getI64Type(); for (auto v : measureOp.getTargets()) - if (v.getType().template isa()) { - Value vecSz = rewriter.template create(loc, i64Ty, v); - totalToRead = - rewriter.template create(loc, totalToRead, vecSz); + if (isa(v.getType())) { + Value vecSz = quake::VeqSizeOp::create(rewriter, loc, i64Ty, v); + totalToRead = arith::AddIOp::create(rewriter, loc, totalToRead, vecSz); } // 2. Create the buffer. auto i1Ty = rewriter.getI1Type(); auto i8Ty = rewriter.getI8Type(); - Value buff = - rewriter.template create(loc, i8Ty, totalToRead); + Value buff = cudaq::cc::AllocaOp::create(rewriter, loc, i8Ty, totalToRead); // 3. Measure each individual qubit and insert the result, in order, into // the buffer. For registers/vectors, loop over the entire set of qubits. - Value buffOff = rewriter.template create(loc, 0, 64); - Value one = rewriter.template create(loc, 1, 64); + Value buffOff = arith::ConstantIntOp::create(rewriter, loc, 0, 64); + Value one = arith::ConstantIntOp::create(rewriter, loc, 1, 64); auto measTy = quake::MeasureType::get(rewriter.getContext()); for (auto v : measureOp.getTargets()) { if (isa(v.getType())) { - auto meas = rewriter.template create(loc, measTy, v).getMeasOut(); - auto bit = - rewriter.template create(loc, i1Ty, meas); - Value addr = rewriter.template create( - loc, cudaq::cc::PointerType::get(i8Ty), buff, buffOff); - auto bitByte = rewriter.template create( - loc, i8Ty, bit, cudaq::cc::CastOpMode::Unsigned); - rewriter.template create(loc, bitByte, addr); - buffOff = rewriter.template create(loc, buffOff, one); + auto meas = A::create(rewriter, loc, measTy, v).getMeasOut(); + auto bit = quake::DiscriminateOp::create(rewriter, loc, i1Ty, meas); + Value addr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, cudaq::cc::PointerType::get(i8Ty), buff, buffOff); + auto bitByte = cudaq::cc::CastOp::create( + rewriter, loc, i8Ty, bit, cudaq::cc::CastOpMode::Unsigned); + cudaq::cc::StoreOp::create(rewriter, loc, bitByte, addr); + buffOff = arith::AddIOp::create(rewriter, loc, buffOff, one); } else { assert(isa(v.getType())); - Value vecSz = rewriter.template create(loc, i64Ty, v); + Value vecSz = quake::VeqSizeOp::create(rewriter, loc, i64Ty, v); cudaq::opt::factory::createInvariantLoop( rewriter, loc, vecSz, [&](OpBuilder &builder, Location loc, Region &, Block &block) { Value iv = block.getArgument(0); - Value qv = - builder.template create(loc, v, iv); - auto meas = builder.template create(loc, measTy, qv); - auto bit = builder.template create( - loc, i1Ty, meas.getMeasOut()); + Value qv = quake::ExtractRefOp::create(builder, loc, v, iv); + auto meas = A::create(builder, loc, measTy, qv); + auto bit = quake::DiscriminateOp::create(builder, loc, i1Ty, + meas.getMeasOut()); if (auto registerName = measureOp.getRegisterNameAttr()) meas.setRegisterName(registerName); - Value offset = - builder.template create(loc, iv, buffOff); - auto addr = builder.template create( - loc, cudaq::cc::PointerType::get(i8Ty), buff, offset); - auto bitByte = rewriter.template create( - loc, i8Ty, bit, cudaq::cc::CastOpMode::Unsigned); - builder.template create(loc, bitByte, addr); + Value offset = arith::AddIOp::create(builder, loc, iv, buffOff); + auto addr = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(i8Ty), buff, + offset); + auto bitByte = cudaq::cc::CastOp::create( + builder, loc, i8Ty, bit, cudaq::cc::CastOpMode::Unsigned); + cudaq::cc::StoreOp::create(builder, loc, bitByte, addr); }); - buffOff = rewriter.template create(loc, buffOff, vecSz); + buffOff = arith::AddIOp::create(rewriter, loc, buffOff, vecSz); } } @@ -110,7 +109,7 @@ class ExpandRewritePattern : public OpRewritePattern { auto ptrArrI1Ty = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(i1Ty)); auto buffCast = - rewriter.template create(loc, ptrArrI1Ty, buff); + cudaq::cc::CastOp::create(rewriter, loc, ptrArrI1Ty, buff); rewriter.template replaceOpWithNewOp( disc, stdvecTy, buffCast, totalToRead); } @@ -136,13 +135,13 @@ class ResetRewrite : public OpRewritePattern { auto loc = resetOp.getLoc(); auto veqArg = resetOp.getTargets(); auto i64Ty = rewriter.getI64Type(); - Value vecSz = rewriter.create(loc, i64Ty, veqArg); + Value vecSz = quake::VeqSizeOp::create(rewriter, loc, i64Ty, veqArg); cudaq::opt::factory::createInvariantLoop( rewriter, loc, vecSz, [&](OpBuilder &builder, Location loc, Region &, Block &block) { Value iv = block.getArgument(0); - Value qv = builder.create(loc, veqArg, iv); - builder.create(loc, TypeRange{}, qv); + Value qv = quake::ExtractRefOp::create(builder, loc, veqArg, iv); + quake::ResetOp::create(builder, loc, TypeRange{}, qv); }); rewriter.eraseOp(resetOp); return success(); @@ -150,8 +149,9 @@ class ResetRewrite : public OpRewritePattern { }; class ExpandMeasurementsPass - : public cudaq::opt::ExpandMeasurementsBase { + : public cudaq::opt::impl::ExpandMeasurementsBase { public: + using Base::Base; void runOnOperation() override { auto *op = getOperation(); auto *ctx = &getContext(); diff --git a/lib/Optimizer/Transforms/FactorQuantumAlloc.cpp b/lib/Optimizer/Transforms/FactorQuantumAlloc.cpp index bf82726a1a4..c1da88569e5 100644 --- a/lib/Optimizer/Transforms/FactorQuantumAlloc.cpp +++ b/lib/Optimizer/Transforms/FactorQuantumAlloc.cpp @@ -8,7 +8,6 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -95,10 +94,10 @@ class AllocaPattern : public OpRewritePattern { SmallVector memAllocs; for (auto memTy : stqTy.getMembers()) memAllocs.emplace_back( - rewriter.create(loc, memTy).getResult()); + quake::AllocaOp::create(rewriter, loc, memTy).getResult()); // 2. Create a value of the original struq type using quake.make_struq. auto aggregate = - rewriter.create(loc, stqTy, memAllocs); + quake::MakeStruqOp::create(rewriter, loc, stqTy, memAllocs); // 3. Walk all the uses. If they are quake.get_member operations, replace // them with direct uses. for (auto *user : llvm::make_early_inc_range(allocOp->getUsers())) @@ -119,7 +118,7 @@ class AllocaPattern : public OpRewritePattern { // Split the aggregate veq into a sequence of distinct alloca of ref. for (std::size_t i = 0; i < size; ++i) - newAllocs.emplace_back(rewriter.create(loc, refTy)); + newAllocs.emplace_back(quake::AllocaOp::create(rewriter, loc, refTy)); if (usesAreConvertible(allocOp)) { // Visit all users and replace them accordingly. @@ -150,7 +149,7 @@ class AllocaPattern : public OpRewritePattern { rewriter.setInsertionPoint(dealloc); auto deloc = dealloc.getLoc(); for (std::size_t i = 0; i < size - 1; ++i) - rewriter.create(deloc, newAllocs[i]); + quake::DeallocOp::create(rewriter, deloc, newAllocs[i]); rewriter.replaceOpWithNewOp(dealloc, newAllocs[size - 1]); continue; @@ -215,20 +214,17 @@ class DeallocPattern : public OpRewritePattern { } auto loc = dealloc.getLoc(); - // 1. Split the aggregate alloc into a sequence of distinct dealloc of - // ref. if (auto veqTy = dyn_cast(allocTy)) { generateDeallocs(veqTy, rewriter, loc, alloc); } else if (auto stqTy = dyn_cast(allocTy)) { - // Process a struq in memberwise fashion. for (auto iter : llvm::enumerate(stqTy.getMembers())) { Type memTy = iter.value(); - auto mem = rewriter.create(loc, memTy, alloc, - iter.index()); + auto mem = quake::GetMemberOp::create(rewriter, loc, memTy, alloc, + iter.index()); if (auto veqTy = dyn_cast(memTy)) generateDeallocs(veqTy, rewriter, loc, mem); else - rewriter.create(loc, mem); + quake::DeallocOp::create(rewriter, loc, mem); } } @@ -243,8 +239,8 @@ class DeallocPattern : public OpRewritePattern { std::size_t size = veqTy.getSize(); for (std::size_t i = 0; i < size; ++i) { - Value r = rewriter.create(loc, alloc, i); - rewriter.create(loc, r); + Value r = quake::ExtractRefOp::create(rewriter, loc, alloc, i); + quake::DeallocOp::create(rewriter, loc, r); } }; }; @@ -284,7 +280,7 @@ class FactorQuantumAllocationsPass func::FuncOp func = getOperation(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns)))) + if (failed(applyPatternsGreedily(func, std::move(patterns)))) return failure(); return success(); } @@ -294,7 +290,7 @@ class FactorQuantumAllocationsPass func::FuncOp func = getOperation(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns)))) + if (failed(applyPatternsGreedily(func, std::move(patterns)))) return failure(); return success(); } diff --git a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp index a6ce7e9dab2..ca5518185e6 100644 --- a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp +++ b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp @@ -99,11 +99,11 @@ class GenerateDeviceCodeLoaderPass auto funcOp = dyn_cast(op); if (!funcOp) continue; - if (!funcOp.getName().startswith(cudaq::runtime::cudaqGenPrefixName)) + if (!funcOp.getName().starts_with(cudaq::runtime::cudaqGenPrefixName)) continue; if (funcOp->hasAttr(cudaq::generatorAnnotation) || funcOp.empty()) continue; - if (funcOp.getName().endswith(".entry")) + if (funcOp.getName().ends_with(".entry")) continue; auto className = funcOp.getName().drop_front(cudaq::runtime::cudaqGenPrefixLength); @@ -160,36 +160,38 @@ class GenerateDeviceCodeLoaderPass strOut << *op << '\n'; strOut << "\n}\n" << '\0'; - auto devCode = builder.create( - loc, cudaq::opt::factory::getStringType(ctx, funcCode.size()), + auto devCode = LLVM::GlobalOp::create( + builder, loc, + cudaq::opt::factory::getStringType(ctx, funcCode.size()), /*isConstant=*/true, LLVM::Linkage::Private, className.str() + "CodeHolder.extract_device_code", builder.getStringAttr(funcCode), /*alignment=*/0); - auto devName = builder.create( - loc, cudaq::opt::factory::getStringType(ctx, className.size() + 1), + auto devName = LLVM::GlobalOp::create( + builder, loc, + cudaq::opt::factory::getStringType(ctx, className.size() + 1), /*isConstant=*/true, LLVM::Linkage::Private, className.str() + "CodeHolder.extract_device_name", builder.getStringAttr(className.str() + '\0'), /*alignment=*/0); - auto initFun = builder.create( - loc, className.str() + ".init_func", + auto initFun = LLVM::LLVMFuncOp::create( + builder, loc, className.str() + ".init_func", LLVM::LLVMFunctionType::get(cudaq::opt::factory::getVoidType(ctx), {})); auto insPt = builder.saveInsertionPoint(); - auto *initFunEntry = initFun.addEntryBlock(); + auto *initFunEntry = initFun.addEntryBlock(builder); builder.setInsertionPointToStart(initFunEntry); - auto devRef = builder.create( - loc, cudaq::opt::factory::getPointerType(devName.getType()), + auto devRef = LLVM::AddressOfOp::create( + builder, loc, cudaq::opt::factory::getPointerType(devName.getType()), devName.getSymName()); - auto codeRef = builder.create( - loc, cudaq::opt::factory::getPointerType(devCode.getType()), + auto codeRef = LLVM::AddressOfOp::create( + builder, loc, cudaq::opt::factory::getPointerType(devCode.getType()), devCode.getSymName()); - auto castDevRef = builder.create( - loc, cudaq::opt::factory::getPointerType(ctx), devRef); - auto castCodeRef = builder.create( - loc, cudaq::opt::factory::getPointerType(ctx), codeRef); - builder.create(loc, std::nullopt, - cudaq::runtime::deviceCodeHolderAdd, - ValueRange{castDevRef, castCodeRef}); + auto castDevRef = LLVM::BitcastOp::create( + builder, loc, cudaq::opt::factory::getPointerType(ctx), devRef); + auto castCodeRef = LLVM::BitcastOp::create( + builder, loc, cudaq::opt::factory::getPointerType(ctx), codeRef); + LLVM::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::deviceCodeHolderAdd, + ValueRange{castDevRef, castCodeRef}); auto kernName = funcOp.getSymName().str(); if (!jitTime && mangledNameMap && !mangledNameMap.empty() && @@ -198,10 +200,11 @@ class GenerateDeviceCodeLoaderPass auto getEntryRef = [&](auto kernName) -> Value { auto hostFuncNameAttr = mangledNameMap.getAs(kernName); auto hostFuncName = hostFuncNameAttr.getValue(); - if (hostFuncName.endswith("_PyKernelEntryPointRewrite")) { + if (hostFuncName.ends_with("_PyKernelEntryPointRewrite")) { // This is a Python module, so there is no kernel host entry point. - auto zero = builder.create(loc, 0, 64); - return builder.create(loc, ptrTy, zero); + auto zero = arith::ConstantIntOp::create( + builder, loc, builder.getIntegerType(64), 0); + return cudaq::cc::CastOp::create(builder, loc, ptrTy, zero); } auto hostFuncOp = module.lookupSymbol(hostFuncName); if (!hostFuncOp) { @@ -211,9 +214,10 @@ class GenerateDeviceCodeLoaderPass {}, module); hostFuncOp.setPrivate(); } - auto entryRef = builder.create( - loc, hostFuncOp.getFunctionType(), hostFuncOp.getSymName()); - return builder.create(loc, ptrTy, entryRef); + auto entryRef = func::ConstantOp::create(builder, loc, + hostFuncOp.getFunctionType(), + hostFuncOp.getSymName()); + return cudaq::cc::FuncToPtrOp::create(builder, loc, ptrTy, entryRef); }; auto castEntryRef = getEntryRef(kernName); @@ -223,27 +227,27 @@ class GenerateDeviceCodeLoaderPass auto nameTy = cudaq::opt::factory::getStringType(ctx, kernName.size() + 1); // The original kernel's name was already created. - auto devRef = builder.create( - loc, cudaq::opt::factory::getPointerType(nameTy), + auto devRef = LLVM::AddressOfOp::create( + builder, loc, cudaq::opt::factory::getPointerType(nameTy), kernName + "CodeHolder.extract_device_name"); - auto ccPtr = builder.create(loc, ptrTy, devRef); - builder.create(loc, std::nullopt, - cudaq::runtime::registerRunnableKernel, - ValueRange{ccPtr, castEntryRef}); + auto ccPtr = cudaq::cc::CastOp::create(builder, loc, ptrTy, devRef); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::registerRunnableKernel, + ValueRange{ccPtr, castEntryRef}); } else { - auto deviceRef = builder.create( - loc, funcOp.getFunctionType(), funcOp.getSymName()); + auto deviceRef = func::ConstantOp::create( + builder, loc, funcOp.getFunctionType(), funcOp.getSymName()); auto castDeviceRef = - builder.create(loc, ptrTy, deviceRef); + cudaq::cc::FuncToPtrOp::create(builder, loc, ptrTy, deviceRef); auto castKernNameRef = - builder.create(loc, ptrTy, devRef); - builder.create( - loc, std::nullopt, cudaq::runtime::registerLinkableKernel, + cudaq::cc::CastOp::create(builder, loc, ptrTy, devRef); + func::CallOp::create( + builder, loc, TypeRange{}, cudaq::runtime::registerLinkableKernel, ValueRange{castEntryRef, castKernNameRef, castDeviceRef}); } } - builder.create(loc, ValueRange{}); + LLVM::ReturnOp::create(builder, loc, ValueRange{}); builder.restoreInsertionPoint(insPt); cudaq::opt::factory::createGlobalCtorCall( module, mlir::FlatSymbolRefAttr::get(ctx, initFun.getName())); diff --git a/lib/Optimizer/Transforms/GenKernelExecution.cpp b/lib/Optimizer/Transforms/GenKernelExecution.cpp index 377f16a24b5..14075be4b17 100644 --- a/lib/Optimizer/Transforms/GenKernelExecution.cpp +++ b/lib/Optimizer/Transforms/GenKernelExecution.cpp @@ -11,8 +11,6 @@ #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Marshal.h" #include "cudaq/Optimizer/Builder/Runtime.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "clang/Basic/Version.h" @@ -20,7 +18,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ToolOutputFile.h" -#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/IR/Diagnostics.h" #include "mlir/Transforms/Passes.h" #include @@ -58,7 +55,7 @@ zipArgumentsWithDeviceTypes(Location loc, OpBuilder &builder, ModuleOp module, if (!(cudaq::cc::isDynamicType(ty) || cudaq::opt::marshal::isStateType(ty) || isa(ty))) - v = builder.create(loc, v); + v = cudaq::cc::LoadOp::create(builder, loc, v); // Python will pass a std::vector to us here. Unpack it. auto pear = cudaq::opt::marshal::unpackAnyStdVectorBool( loc, builder, module, v, ty, heapTracker); @@ -98,18 +95,18 @@ zipArgumentsWithDeviceTypes(Location loc, OpBuilder &builder, ModuleOp module, // will match the memory layout of the small struct. auto pairTy = cudaq::cc::StructType::get( ctx, ArrayRef{first.getType(), second.getType()}); - auto tmp = builder.create(loc, pairTy); - auto tmp1 = builder.create( - loc, cudaq::cc::PointerType::get(first.getType()), tmp); - builder.create(loc, first, tmp1); - auto tmp2 = builder.create( - loc, cudaq::cc::PointerType::get(second.getType()), tmp, + auto tmp = cudaq::cc::AllocaOp::create(builder, loc, pairTy); + auto tmp1 = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(first.getType()), tmp); + cudaq::cc::StoreOp::create(builder, loc, first, tmp1); + auto tmp2 = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(second.getType()), tmp, ArrayRef{1}); - builder.create(loc, second, tmp2); + cudaq::cc::StoreOp::create(builder, loc, second, tmp2); auto devPtrTy = cudaq::cc::PointerType::get(devTy); - Value devVal = builder.create(loc, devPtrTy, tmp); + Value devVal = cudaq::cc::CastOp::create(builder, loc, devPtrTy, tmp); if (!cudaq::cc::isDynamicType(devTy)) - devVal = builder.create(loc, devVal); + devVal = cudaq::cc::LoadOp::create(builder, loc, devVal); result.emplace_back(argPos, devVal, devTy); continue; } @@ -118,7 +115,7 @@ zipArgumentsWithDeviceTypes(Location loc, OpBuilder &builder, ModuleOp module, if (isa(devTy) && isa((*argIter).getType()) && !cudaq::cc::isDynamicType(devTy)) { - Value devVal = builder.create(loc, *argIter); + Value devVal = cudaq::cc::LoadOp::create(builder, loc, *argIter); result.emplace_back(argPos, devVal, devTy); continue; } @@ -179,8 +176,8 @@ class GenerateKernelExecution // Create the function that we'll fill. auto funcType = FunctionType::get(ctx, {ptrPtrType, ptrPtrType}, {i64Ty}); - auto argsCreatorFunc = builder.create( - loc, classNameStr + ".argsCreator", funcType); + auto argsCreatorFunc = func::FuncOp::create( + builder, loc, classNameStr + ".argsCreator", funcType); OpBuilder::InsertionGuard guard(builder); auto *entry = argsCreatorFunc.addEntryBlock(); builder.setInsertionPointToStart(entry); @@ -193,23 +190,25 @@ class GenerateKernelExecution // bug in the code that is calling this argsCreator. // Get the array of void* args. - auto argsArray = builder.create( - loc, cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(ptrI8Ty)), + auto argsArray = cudaq::cc::CastOp::create( + builder, loc, + cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(ptrI8Ty)), entry->getArgument(0)); // Loop over the array and cast the void* to the host-side type. SmallVector pseudoArgs; for (auto iter : llvm::enumerate(passedHostArgTys)) { std::int32_t i = iter.index(); - auto parg = builder.create( - loc, ptrPtrType, argsArray, ArrayRef{i}); + auto parg = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrPtrType, argsArray, + ArrayRef{i}); Type ty = iter.value(); // parg is a pointer to a pointer as it is an element of an array of // pointers. Always dereference the first layer here. - Value deref = builder.create(loc, parg); + Value deref = cudaq::cc::LoadOp::create(builder, loc, parg); if (!isa(ty)) ty = cudaq::cc::PointerType::get(ty); - pseudoArgs.push_back(builder.create(loc, ty, deref)); + pseudoArgs.push_back(cudaq::cc::CastOp::create(builder, loc, ty, deref)); } // Zip the arguments with the device side argument types. Recall that some @@ -220,32 +219,33 @@ class GenerateKernelExecution cudaq::opt::marshal::createEmptyHeapTracker(loc, builder); auto zippy = zipArgumentsWithDeviceTypes( loc, builder, module, pseudoArgs, passedDevArgTys, heapTracker); - auto sizeScratch = builder.create(loc, i64Ty); + auto sizeScratch = cudaq::cc::AllocaOp::create(builder, loc, i64Ty); auto messageBufferSize = [&]() -> Value { if (hasDynamicSignature) return cudaq::opt::marshal::genSizeOfDynamicMessageBuffer( loc, builder, module, msgStructTy, zippy, sizeScratch); - return builder.create(loc, i64Ty, msgStructTy); + return cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, msgStructTy); }(); // Allocate the message buffer on the heap. It must outlive this call. - auto buff = builder.create(loc, ptrI8Ty, "malloc", - ValueRange(messageBufferSize)); + auto buff = func::CallOp::create(builder, loc, ptrI8Ty, "malloc", + ValueRange(messageBufferSize)); Value rawMessageBuffer = buff.getResult(0); Value msgBufferPrefix = - builder.create(loc, structPtrTy, rawMessageBuffer); + cudaq::cc::CastOp::create(builder, loc, structPtrTy, rawMessageBuffer); // Populate the message buffer with the pointer-free argument values. if (hasDynamicSignature) { - auto addendumScratch = builder.create(loc, ptrI8Ty); + auto addendumScratch = cudaq::cc::AllocaOp::create(builder, loc, ptrI8Ty); Value prefixSize = - builder.create(loc, i64Ty, msgStructTy); - auto arrMessageBuffer = builder.create( - loc, cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(i8Ty)), + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, msgStructTy); + auto arrMessageBuffer = cudaq::cc::CastOp::create( + builder, loc, + cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(i8Ty)), rawMessageBuffer); // Compute the position of the addendum. - Value addendumPtr = builder.create( - loc, ptrI8Ty, arrMessageBuffer, + Value addendumPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrI8Ty, arrMessageBuffer, ArrayRef{prefixSize}); cudaq::opt::marshal::populateMessageBuffer(loc, builder, module, msgBufferPrefix, zippy, @@ -258,9 +258,9 @@ class GenerateKernelExecution cudaq::opt::marshal::maybeFreeHeapAllocations(loc, builder, heapTracker); // Return the message buffer and its size in bytes. - builder.create(loc, rawMessageBuffer, - entry->getArgument(1)); - builder.create(loc, ValueRange{messageBufferSize}); + cudaq::cc::StoreOp::create(builder, loc, rawMessageBuffer, + entry->getArgument(1)); + func::ReturnOp::create(builder, loc, ValueRange{messageBufferSize}); // Note: the .argsCreator will have allocated space for a static result in // the message buffer. If the kernel returns a dynamic result, the launch @@ -282,27 +282,27 @@ class GenerateKernelExecution auto *ctx = builder.getContext(); auto thunkTy = cudaq::opt::marshal::getThunkType(ctx); auto thunk = - builder.create(loc, classNameStr + ".thunk", thunkTy); + func::FuncOp::create(builder, loc, classNameStr + ".thunk", thunkTy); OpBuilder::InsertionGuard guard(builder); auto *thunkEntry = thunk.addEntryBlock(); builder.setInsertionPointToStart(thunkEntry); - auto castOp = builder.create(loc, structPtrTy, - thunkEntry->getArgument(0)); + auto castOp = cudaq::cc::CastOp::create(builder, loc, structPtrTy, + thunkEntry->getArgument(0)); auto isClientServer = thunkEntry->getArgument(1); auto i64Ty = builder.getI64Type(); // Compute the struct size without the trailing bytes, structSize. Value structSize = - builder.create(loc, i64Ty, structTy); + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, structTy); // Compute location of trailing bytes. auto bufferPtrTy = cudaq::opt::factory::getIndexedObjectType(builder.getI8Type()); - Value extendedBuffer = builder.create( - loc, bufferPtrTy, thunkEntry->getArgument(0)); + Value extendedBuffer = cudaq::cc::CastOp::create( + builder, loc, bufferPtrTy, thunkEntry->getArgument(0)); auto ptrI8Ty = cudaq::cc::PointerType::get(builder.getI8Type()); - Value trailingData = builder.create( - loc, ptrI8Ty, extendedBuffer, structSize); + Value trailingData = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrI8Ty, extendedBuffer, structSize); // Unpack the arguments in the struct and build the argument list for // the call to the kernel code. @@ -310,7 +310,7 @@ class GenerateKernelExecution const std::int32_t offset = funcTy.getNumInputs(); if (positNullary) { for (auto inp : funcOp.getFunctionType().getInputs()) - args.push_back(builder.create(loc, inp)); + args.push_back(cudaq::cc::UndefOp::create(builder, loc, inp)); } else { for (auto inp : llvm::enumerate(funcTy.getInputs())) { auto [a, t] = cudaq::opt::marshal::processInputValue( @@ -320,12 +320,13 @@ class GenerateKernelExecution args.push_back(a); } } - auto call = builder.create( - loc, funcTy.getResults(), funcOp.getName(), args); + auto call = cudaq::cc::NoInlineCallOp::create( + builder, loc, funcTy.getResults(), funcOp.getName(), args, ArrayAttr(), + ArrayAttr()); // After the kernel call, clean up any `Array` allocations during kernel // executions. - builder.create(loc, std::nullopt, - cudaq::runtime::cleanupArrays, ValueRange{}); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::cleanupArrays, ValueRange{}); const bool hasVectorResult = funcTy.getNumResults() == 1 && isa(funcTy.getResult(0)); @@ -341,16 +342,18 @@ class GenerateKernelExecution builder.setInsertionPointToEnd(currentBlock); auto eleTy = structTy.getMember(offset); auto memTy = cudaq::cc::PointerType::get(eleTy); - auto mem = builder.create( - loc, memTy, castOp, SmallVector{offset}); + auto mem = cudaq::cc::ComputePtrOp::create( + builder, loc, memTy, castOp, + SmallVector{offset}); auto resPtrTy = cudaq::cc::PointerType::get(call.getResult(0).getType()); - auto castMem = builder.create(loc, resPtrTy, mem); - builder.create(loc, call.getResult(0), castMem); - builder.create(loc, isClientServer, thenBlock, - elseBlock); + auto castMem = cudaq::cc::CastOp::create(builder, loc, resPtrTy, mem); + cudaq::cc::StoreOp::create(builder, loc, call.getResult(0), castMem); + cf::CondBranchOp::create(builder, loc, isClientServer, thenBlock, + elseBlock); builder.setInsertionPointToEnd(thenBlock); - auto resAsArg = builder.create( - loc, cudaq::cc::PointerType::get(thunkTy.getResults()[0]), mem); + auto resAsArg = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(thunkTy.getResults()[0]), + mem); auto retOffset = cudaq::opt::marshal::genComputeReturnOffset( loc, builder, funcTy, structTy); // createDynamicResult allocates a new buffer and packs the input values @@ -359,11 +362,11 @@ class GenerateKernelExecution // NB: This code only handles one dimensional vectors of static types. It // will have to be changed if there is a need to return recursively // dynamic structures, i.e., vectors of vectors. - auto res = builder.create( - loc, thunkTy.getResults()[0], "__nvqpp_createDynamicResult", + auto res = func::CallOp::create( + builder, loc, thunkTy.getResults()[0], "__nvqpp_createDynamicResult", ValueRange{thunkEntry->getArgument(0), structSize, resAsArg, retOffset}); - builder.create(loc, res.getResult(0)); + func::ReturnOp::create(builder, loc, res.getResult(0)); builder.setInsertionPointToEnd(elseBlock); // For the else case, the span was already copied to the block. } else { @@ -376,15 +379,15 @@ class GenerateKernelExecution o < static_cast(funcTy.getNumResults()); ++o) { auto eleTy = structTy.getMember(offset + o); auto memTy = cudaq::cc::PointerType::get(eleTy); - auto mem = builder.create( - loc, memTy, castOp, + auto mem = cudaq::cc::ComputePtrOp::create( + builder, loc, memTy, castOp, SmallVector{offset + o}); auto resTy = call.getResult(o).getType(); auto resPtrTy = cudaq::cc::PointerType::get(resTy); Value castMem = mem; if (resPtrTy != mem.getType()) - castMem = builder.create(loc, resPtrTy, mem); - builder.create(loc, call.getResult(o), castMem); + castMem = cudaq::cc::CastOp::create(builder, loc, resPtrTy, mem); + cudaq::cc::StoreOp::create(builder, loc, call.getResult(o), castMem); } } } @@ -392,9 +395,9 @@ class GenerateKernelExecution // that no messages need to be sent and that the CPU and QPU code share a // memory space. Therefore, making any copies can be skipped. auto zeroRes = - builder.create(loc, thunkTy.getResults()[0], - "__nvqpp_zeroDynamicResult", ValueRange{}); - builder.create(loc, zeroRes.getResult(0)); + func::CallOp::create(builder, loc, thunkTy.getResults()[0], + "__nvqpp_zeroDynamicResult", ValueRange{}); + func::ReturnOp::create(builder, loc, zeroRes.getResult(0)); return thunk; } @@ -430,12 +433,12 @@ class GenerateKernelExecution cudaq::opt::marshal::createEmptyHeapTracker(loc, builder); auto zippy = zipArgumentsWithDeviceTypes( loc, builder, module, blockValues, devFuncTy.getInputs(), heapTracker); - auto sizeScratch = builder.create(loc, i64Ty); + auto sizeScratch = cudaq::cc::AllocaOp::create(builder, loc, i64Ty); auto messageBufferSize = [&]() -> Value { if (hasDynamicSignature) return cudaq::opt::marshal::genSizeOfDynamicMessageBuffer( loc, builder, module, structTy, zippy, sizeScratch); - return builder.create(loc, i64Ty, structTy); + return cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, structTy); }(); Value msgBufferPrefix; @@ -445,17 +448,17 @@ class GenerateKernelExecution Value extendedStructSize; if (cudaq::opt::marshal::isCodegenPackedData(codegenKind)) { auto rawMessageBuffer = - builder.create(loc, i8Ty, messageBufferSize); - msgBufferPrefix = - builder.create(loc, structPtrTy, rawMessageBuffer); + cudaq::cc::AllocaOp::create(builder, loc, i8Ty, messageBufferSize); + msgBufferPrefix = cudaq::cc::CastOp::create(builder, loc, structPtrTy, + rawMessageBuffer); if (hasDynamicSignature) { auto addendumScratch = - builder.create(loc, ptrI8Ty); + cudaq::cc::AllocaOp::create(builder, loc, ptrI8Ty); Value prefixSize = - builder.create(loc, i64Ty, structTy); - Value addendumPtr = builder.create( - loc, ptrI8Ty, rawMessageBuffer, + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, structTy); + Value addendumPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrI8Ty, rawMessageBuffer, ArrayRef{prefixSize}); cudaq::opt::marshal::populateMessageBuffer( loc, builder, module, msgBufferPrefix, zippy, addendumPtr, @@ -468,11 +471,11 @@ class GenerateKernelExecution cudaq::opt::marshal::maybeFreeHeapAllocations(loc, builder, heapTracker); extendedStructSize = messageBufferSize; Value loadThunk = - builder.create(loc, thunkTy, thunkFunc.getName()); + func::ConstantOp::create(builder, loc, thunkTy, thunkFunc.getName()); castLoadThunk = - builder.create(loc, ptrI8Ty, loadThunk); + cudaq::cc::FuncToPtrOp::create(builder, loc, ptrI8Ty, loadThunk); castTemp = - builder.create(loc, ptrI8Ty, msgBufferPrefix); + cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, msgBufferPrefix); resultOffset = cudaq::opt::marshal::genComputeReturnOffset( loc, builder, devFuncTy, structTy); } @@ -481,25 +484,26 @@ class GenerateKernelExecution if (cudaq::opt::marshal::isCodegenArgumentGather(codegenKind)) { // 1) Allocate and initialize a std::vector object. const unsigned count = devFuncTy.getInputs().size(); - auto stdVec = builder.create( - loc, cudaq::opt::factory::stlVectorType(ptrI8Ty)); + auto stdVec = cudaq::cc::AllocaOp::create( + builder, loc, cudaq::opt::factory::stlVectorType(ptrI8Ty)); auto arrPtrTy = cudaq::cc::ArrayType::get(ctx, ptrI8Ty, count); - Value buffer = builder.create(loc, arrPtrTy); - auto buffSize = builder.create(loc, i64Ty, arrPtrTy); + Value buffer = cudaq::cc::AllocaOp::create(builder, loc, arrPtrTy); + auto buffSize = + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, arrPtrTy); auto ptrPtrTy = cudaq::cc::PointerType::get(ptrI8Ty); - auto cast1 = builder.create(loc, ptrPtrTy, buffer); + auto cast1 = cudaq::cc::CastOp::create(builder, loc, ptrPtrTy, buffer); auto ptr3Ty = cudaq::cc::PointerType::get(ptrPtrTy); - auto stdVec0 = builder.create(loc, ptr3Ty, stdVec); - builder.create(loc, cast1, stdVec0); - auto cast2 = builder.create(loc, i64Ty, buffer); - auto endBuff = builder.create(loc, cast2, buffSize); - auto cast3 = builder.create(loc, ptrPtrTy, endBuff); - auto stdVec1 = builder.create( - loc, ptr3Ty, stdVec, ArrayRef{1}); - builder.create(loc, cast3, stdVec1); - auto stdVec2 = builder.create( - loc, ptr3Ty, stdVec, ArrayRef{2}); - builder.create(loc, cast3, stdVec2); + auto stdVec0 = cudaq::cc::CastOp::create(builder, loc, ptr3Ty, stdVec); + cudaq::cc::StoreOp::create(builder, loc, cast1, stdVec0); + auto cast2 = cudaq::cc::CastOp::create(builder, loc, i64Ty, buffer); + auto endBuff = arith::AddIOp::create(builder, loc, cast2, buffSize); + auto cast3 = cudaq::cc::CastOp::create(builder, loc, ptrPtrTy, endBuff); + auto stdVec1 = cudaq::cc::ComputePtrOp::create( + builder, loc, ptr3Ty, stdVec, ArrayRef{1}); + cudaq::cc::StoreOp::create(builder, loc, cast3, stdVec1); + auto stdVec2 = cudaq::cc::ComputePtrOp::create( + builder, loc, ptr3Ty, stdVec, ArrayRef{2}); + cudaq::cc::StoreOp::create(builder, loc, cast3, stdVec2); // 2) Iterate over the arguments passed in and populate the vector. SmallVector blockArgs{ @@ -508,12 +512,13 @@ class GenerateKernelExecution unsigned j = 0; for (std::int32_t i = 0, N = blockArgs.size(); i < N; ++i, ++j) { auto blkArg = blockArgs[i]; - auto pos = builder.create( - loc, ptrPtrTy, buffer, ArrayRef{i}); + auto pos = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrPtrTy, buffer, + ArrayRef{i}); if (isa(blkArg.getType())) { auto castArg = - builder.create(loc, ptrI8Ty, blkArg); - builder.create(loc, castArg, pos); + cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, blkArg); + cudaq::cc::StoreOp::create(builder, loc, castArg, pos); continue; } Value temp; @@ -522,39 +527,41 @@ class GenerateKernelExecution cudaq::opt::factory::structUsesTwoArguments( devFuncTy.getInput(j))) { temp = - builder.create(loc, devFuncTy.getInput(j)); - auto part1 = builder.create( - loc, cudaq::cc::PointerType::get(blkArg.getType()), temp); - builder.create(loc, blkArg, part1); + cudaq::cc::AllocaOp::create(builder, loc, devFuncTy.getInput(j)); + auto part1 = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(blkArg.getType()), + temp); + cudaq::cc::StoreOp::create(builder, loc, blkArg, part1); auto blkArg2 = blockArgs[++i]; - auto cast2 = builder.create( - loc, + auto cast2 = cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get( cudaq::cc::ArrayType::get(blkArg2.getType())), temp); - auto part2 = builder.create( - loc, cudaq::cc::PointerType::get(blkArg2.getType()), cast2, - ArrayRef{1}); - builder.create(loc, blkArg2, part2); + auto part2 = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(blkArg2.getType()), + cast2, ArrayRef{1}); + cudaq::cc::StoreOp::create(builder, loc, blkArg2, part2); } else if (isa(blkArg.getType())) { // In C++, callables are already resolved. There is nothing to pass. - temp = builder.create(loc, 0, 64); + temp = arith::ConstantIntOp::create(builder, loc, 0, 64); } else { - temp = builder.create(loc, blkArg.getType()); - builder.create(loc, blkArg, temp); + temp = cudaq::cc::AllocaOp::create(builder, loc, blkArg.getType()); + cudaq::cc::StoreOp::create(builder, loc, blkArg, temp); } - auto castTemp = builder.create(loc, ptrI8Ty, temp); - builder.create(loc, castTemp, pos); + auto castTemp = cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, temp); + cudaq::cc::StoreOp::create(builder, loc, castTemp, pos); } - vecArgPtrs = builder.create(loc, ptrI8Ty, stdVec); + vecArgPtrs = cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, stdVec); } // Prepare to call the `launchKernel` runtime library entry point. - Value loadKernName = builder.create( - loc, cudaq::opt::factory::getPointerType(kernelNameObj.getType()), + Value loadKernName = LLVM::AddressOfOp::create( + builder, loc, + cudaq::opt::factory::getPointerType(kernelNameObj.getType()), kernelNameObj.getSymName()); auto castLoadKernName = - builder.create(loc, ptrI8Ty, loadKernName); + cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, loadKernName); auto hostFuncTy = hostFunc.getFunctionType(); assert((hostFuncTy.getResults().empty() || @@ -570,13 +577,13 @@ class GenerateKernelExecution return; Type res0Ty = structTy.getMember(offset); auto ptrResTy = cudaq::cc::PointerType::get(res0Ty); - auto rptr = builder.create(loc, ptrI8Ty, - spanReturned, 0); + auto rptr = cudaq::cc::ExtractValueOp::create(builder, loc, ptrI8Ty, + spanReturned, 0); launchResultToFree = rptr; - auto rIntPtr = builder.create(loc, i64Ty, rptr); - auto zero = builder.create(loc, 0, 64); - auto cmp = builder.create(loc, arith::CmpIPredicate::ne, - rIntPtr, zero); + auto rIntPtr = cudaq::cc::CastOp::create(builder, loc, i64Ty, rptr); + auto zero = arith::ConstantIntOp::create(builder, loc, 0, 64); + auto cmp = arith::CmpIOp::create(builder, loc, arith::CmpIPredicate::ne, + rIntPtr, zero); auto *currentBlock = builder.getBlock(); auto *reg = currentBlock->getParent(); auto *thenBlock = builder.createBlock(reg); @@ -584,22 +591,22 @@ class GenerateKernelExecution auto *endifBlock = builder.createBlock( reg, reg->end(), TypeRange{ptrResTy}, SmallVector(1, loc)); builder.setInsertionPointToEnd(currentBlock); - builder.create(loc, cmp, thenBlock, elseBlock); + cf::CondBranchOp::create(builder, loc, cmp, thenBlock, elseBlock); builder.setInsertionPointToEnd(thenBlock); // dynamic result was returned. // We need to free() this buffer before the end of this function. auto rStructPtr = - builder.create(loc, structPtrTy, rptr); - Value lRes = builder.create( - loc, ptrResTy, rStructPtr, + cudaq::cc::CastOp::create(builder, loc, structPtrTy, rptr); + Value lRes = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrResTy, rStructPtr, ArrayRef{offset}); - builder.create(loc, endifBlock, ArrayRef{lRes}); + cf::BranchOp::create(builder, loc, endifBlock, ArrayRef{lRes}); builder.setInsertionPointToEnd(elseBlock); // span was returned in the original buffer. - Value mRes = builder.create( - loc, ptrResTy, msgBufferPrefix, + Value mRes = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrResTy, msgBufferPrefix, ArrayRef{offset}); - builder.create(loc, endifBlock, ArrayRef{mRes}); + cf::BranchOp::create(builder, loc, endifBlock, ArrayRef{mRes}); builder.setInsertionPointToEnd(endifBlock); launchResult = endifBlock->getArgument(0); }; @@ -608,8 +615,8 @@ class GenerateKernelExecution switch (codegenKind) { case 0: { assert(vecArgPtrs && castLoadThunk); - auto launch = builder.create( - loc, cudaq::opt::factory::getDynamicBufferType(ctx), + auto launch = func::CallOp::create( + builder, loc, cudaq::opt::factory::getDynamicBufferType(ctx), cudaq::runtime::launchKernelHybridFuncName, ArrayRef{castLoadKernName, castLoadThunk, castTemp, extendedStructSize, resultOffset, vecArgPtrs}); @@ -617,8 +624,8 @@ class GenerateKernelExecution } break; case 1: { assert(!vecArgPtrs && castLoadThunk); - auto launch = builder.create( - loc, cudaq::opt::factory::getDynamicBufferType(ctx), + auto launch = func::CallOp::create( + builder, loc, cudaq::opt::factory::getDynamicBufferType(ctx), cudaq::runtime::launchKernelFuncName, ArrayRef{castLoadKernName, castLoadThunk, castTemp, extendedStructSize, resultOffset}); @@ -626,16 +633,16 @@ class GenerateKernelExecution } break; case 2: { assert(vecArgPtrs && !castLoadThunk); - builder.create( - loc, std::nullopt, cudaq::runtime::launchKernelStreamlinedFuncName, - ArrayRef{castLoadKernName, vecArgPtrs}); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::launchKernelStreamlinedFuncName, + ArrayRef{castLoadKernName, vecArgPtrs}); // For this codegen kind, we drop any results on the floor and return // random data in registers and/or off the stack. This maintains parity // with any pre-existing kernel launchers. SmallVector garbage; for (auto ty : hostFunc.getFunctionType().getResults()) - garbage.push_back(builder.create(loc, ty)); - builder.create(loc, garbage); + garbage.push_back(cudaq::cc::UndefOp::create(builder, loc, ty)); + func::ReturnOp::create(builder, loc, garbage); return; } default: @@ -654,16 +661,16 @@ class GenerateKernelExecution // reference. if (resultVal) { // Static values. std::vector are necessarily sret, see below. - auto resPtr = builder.create( - loc, ptrResTy, msgBufferPrefix, + auto resPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrResTy, msgBufferPrefix, ArrayRef{offset}); Type castToTy = cudaq::cc::PointerType::get(hostFuncTy.getResult(0)); auto castResPtr = [&]() -> Value { if (castToTy == ptrResTy) return resPtr; - return builder.create(loc, castToTy, resPtr); + return cudaq::cc::CastOp::create(builder, loc, castToTy, resPtr); }(); - results.push_back(builder.create(loc, castResPtr)); + results.push_back(cudaq::cc::LoadOp::create(builder, loc, castResPtr)); } else { // This is an sret return. Check if device is returning a span. If it // is, then we will need to convert it to a std::vector here. The vector @@ -673,51 +680,52 @@ class GenerateKernelExecution dyn_cast(devFuncTy.getResult(0))) { auto eleTy = spanTy.getElementType(); auto ptrTy = cudaq::cc::PointerType::get(eleTy); - auto gep0 = builder.create( - loc, cudaq::cc::PointerType::get(ptrTy), launchResult, + auto gep0 = cudaq::cc::ComputePtrOp::create( + builder, loc, cudaq::cc::PointerType::get(ptrTy), launchResult, SmallVector{0}); - auto dataPtr = builder.create(loc, gep0); + auto dataPtr = cudaq::cc::LoadOp::create(builder, loc, gep0); auto lenPtrTy = cudaq::cc::PointerType::get(i64Ty); - auto gep1 = builder.create( - loc, lenPtrTy, launchResult, + auto gep1 = cudaq::cc::ComputePtrOp::create( + builder, loc, lenPtrTy, launchResult, SmallVector{1}); - auto vecLen = builder.create(loc, gep1); + auto vecLen = cudaq::cc::LoadOp::create(builder, loc, gep1); if (spanTy.getElementType() == builder.getI1Type()) { cudaq::opt::marshal::genStdvecBoolFromInitList(loc, builder, arg0, dataPtr, vecLen); } else { Value tSize = - builder.create(loc, i64Ty, eleTy); + cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, eleTy); cudaq::opt::marshal::genStdvecTFromInitList(loc, builder, arg0, dataPtr, tSize, vecLen); } // free(nullptr) is defined to be a nop in the standard. - builder.create(loc, std::nullopt, "free", - ArrayRef{launchResultToFree}); + func::CallOp::create(builder, loc, TypeRange{}, "free", + ArrayRef{launchResultToFree}); } else { // Otherwise, we can just copy the aggregate into the sret memory // block. Uses the size of the host function's sret pointer element // type for the memcpy, so the device should return an (aggregate) // value of suitable size. - auto resPtr = builder.create( - loc, ptrResTy, msgBufferPrefix, + auto resPtr = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrResTy, msgBufferPrefix, ArrayRef{offset}); auto castMsgBuff = - builder.create(loc, ptrI8Ty, resPtr); + cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, resPtr); Type eleTy = cast(arg0.getType()).getElementType(); - Value bytes = builder.create(loc, i64Ty, eleTy); - auto notVolatile = builder.create(loc, 0, 1); - auto castArg0 = builder.create(loc, ptrI8Ty, arg0); - builder.create( - loc, std::nullopt, cudaq::llvmMemCopyIntrinsic, + Value bytes = cudaq::cc::SizeOfOp::create(builder, loc, i64Ty, eleTy); + auto notVolatile = arith::ConstantIntOp::create(builder, loc, 0, 1); + auto castArg0 = + cudaq::cc::CastOp::create(builder, loc, ptrI8Ty, arg0); + func::CallOp::create( + builder, loc, TypeRange{}, cudaq::llvmMemCopyIntrinsic, ValueRange{castArg0, castMsgBuff, bytes, notVolatile}); } } } // Return the result (if any). - builder.create(loc, results); + func::ReturnOp::create(builder, loc, results); } /// Generate a function to be executed at load-time which will register the @@ -729,32 +737,34 @@ class GenerateKernelExecution auto module = getOperation(); auto *ctx = builder.getContext(); auto ptrType = cudaq::cc::PointerType::get(builder.getI8Type()); - auto initFun = builder.create( - loc, classNameStr + ".kernelRegFunc", + auto initFun = LLVM::LLVMFuncOp::create( + builder, loc, classNameStr + ".kernelRegFunc", LLVM::LLVMFunctionType::get(cudaq::opt::factory::getVoidType(ctx), {})); OpBuilder::InsertionGuard guard(builder); - auto *initFunEntry = initFun.addEntryBlock(); + auto *initFunEntry = initFun.addEntryBlock(builder); builder.setInsertionPointToStart(initFunEntry); - auto kernRef = builder.create( - loc, cudaq::opt::factory::getPointerType(kernelNameObj.getType()), + auto kernRef = LLVM::AddressOfOp::create( + builder, loc, + cudaq::opt::factory::getPointerType(kernelNameObj.getType()), kernelNameObj.getSymName()); - auto castKernRef = builder.create(loc, ptrType, kernRef); - builder.create(loc, std::nullopt, - cudaq::runtime::CudaqRegisterKernelName, - ValueRange{castKernRef}); + auto castKernRef = + cudaq::cc::CastOp::create(builder, loc, ptrType, kernRef); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::CudaqRegisterKernelName, + ValueRange{castKernRef}); if (cudaq::opt::marshal::isCodegenPackedData(codegenKind)) { // Register the argsCreator too auto ptrPtrType = cudaq::cc::PointerType::get(ptrType); auto argsCreatorFuncType = FunctionType::get( ctx, {ptrPtrType, ptrPtrType}, {builder.getI64Type()}); - Value loadArgsCreator = builder.create( - loc, argsCreatorFuncType, argsCreatorFunc.getName()); - auto castLoadArgsCreator = - builder.create(loc, ptrType, loadArgsCreator); - builder.create( - loc, std::nullopt, cudaq::runtime::CudaqRegisterArgsCreator, - ValueRange{castKernRef, castLoadArgsCreator}); + Value loadArgsCreator = func::ConstantOp::create( + builder, loc, argsCreatorFuncType, argsCreatorFunc.getName()); + auto castLoadArgsCreator = cudaq::cc::FuncToPtrOp::create( + builder, loc, ptrType, loadArgsCreator); + func::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::CudaqRegisterArgsCreator, + ValueRange{castKernRef, castLoadArgsCreator}); } // Check if this is a lambda mangled name @@ -771,29 +781,31 @@ class GenerateKernelExecution // Create this global name, it is unique for any lambda // bc classNameStr contains the parentFunc + varName - auto lambdaName = builder.create( - loc, + auto lambdaName = LLVM::GlobalOp::create( + builder, loc, cudaq::opt::factory::getStringType(ctx, demangledName.size() + 1), /*isConstant=*/true, LLVM::Linkage::External, classNameStr + ".lambdaName", builder.getStringAttr(demangledName + '\0'), /*alignment=*/0); builder.restoreInsertionPoint(insertPoint); - auto lambdaRef = builder.create( - loc, cudaq::opt::factory::getPointerType(lambdaName.getType()), + auto lambdaRef = LLVM::AddressOfOp::create( + builder, loc, + cudaq::opt::factory::getPointerType(lambdaName.getType()), lambdaName.getSymName()); - auto castLambdaRef = builder.create( - loc, cudaq::opt::factory::getPointerType(ctx), lambdaRef); - auto castKernelRef = builder.create( - loc, cudaq::opt::factory::getPointerType(ctx), castKernRef); - builder.create(loc, std::nullopt, - cudaq::runtime::CudaqRegisterLambdaName, - ValueRange{castLambdaRef, castKernelRef}); + auto castLambdaRef = cudaq::cc::CastOp::create( + builder, loc, cudaq::opt::factory::getPointerType(ctx), lambdaRef); + auto castKernelRef = cudaq::cc::CastOp::create( + builder, loc, cudaq::opt::factory::getPointerType(ctx), + castKernRef); + LLVM::CallOp::create(builder, loc, TypeRange{}, + cudaq::runtime::CudaqRegisterLambdaName, + ValueRange{castLambdaRef, castKernelRef}); } } - builder.create(loc, ValueRange{}); + LLVM::ReturnOp::create(builder, loc, ValueRange{}); return initFun; } @@ -915,7 +927,7 @@ class GenerateKernelExecution SmallVector workList; for (auto &op : *module.getBody()) if (auto funcOp = dyn_cast(op)) - if (funcOp.getName().startswith(cudaq::runtime::cudaqGenPrefixName) && + if (funcOp.getName().starts_with(cudaq::runtime::cudaqGenPrefixName) && cudaq::opt::marshal::hasLegalType(funcOp.getFunctionType()) && !funcOp.empty() && !funcOp->hasAttr(cudaq::generatorAnnotation)) workList.push_back(funcOp); @@ -936,7 +948,7 @@ class GenerateKernelExecution { // Create the run kernel and drop the return result on the floor. auto runKern = - builder.create(loc, runKernName, runKernTy); + func::FuncOp::create(builder, loc, runKernName, runKernTy); auto unitAttr = builder.getUnitAttr(); runKern->setAttr(cudaq::entryPointAttrName, unitAttr); runKern->setAttr(cudaq::kernelAttrName, unitAttr); @@ -949,11 +961,11 @@ class GenerateKernelExecution OpBuilder::InsertionGuard guard(builder); Block *entry = runKern.addEntryBlock(); builder.setInsertionPointToStart(entry); - auto kern = builder.create( - loc, epKern.getFunctionType().getResults(), epKern.getName(), - entry->getArguments()); - builder.create(loc, kern.getResults()); - builder.create(loc); + auto kern = func::CallOp::create( + builder, loc, epKern.getFunctionType().getResults(), + epKern.getName(), entry->getArguments()); + cudaq::cc::LogOutputOp::create(builder, loc, kern.getResults()); + func::ReturnOp::create(builder, loc); runKernels.push_back(runKern); } { @@ -973,8 +985,8 @@ class GenerateKernelExecution runKernTy, /*hasThisPointer=*/false, module); runEntryKernTy = FunctionType::get(ctx, runEntryKernTy.getInputs(), {}); - auto runEntryKern = builder.create( - loc, runKernEntryName, runEntryKernTy); + auto runEntryKern = func::FuncOp::create( + builder, loc, runKernEntryName, runEntryKernTy); auto origEntryFunc = [&]() -> func::FuncOp { auto mangledNameMap = module->getAttrOfType( cudaq::runtime::mangledNameMap); @@ -989,7 +1001,7 @@ class GenerateKernelExecution OpBuilder::InsertionGuard guard(builder); Block *entry = runEntryKern.addEntryBlock(); builder.setInsertionPointToStart(entry); - builder.create(loc); + func::ReturnOp::create(builder, loc); // Append this to the kernel name map. auto dict = module->getAttrOfType( cudaq::runtime::mangledNameMap); @@ -1022,8 +1034,9 @@ class GenerateKernelExecution auto classNameStr = className.str(); // Create a constant with the name of the kernel as a C string. - auto kernelNameObj = builder.create( - loc, cudaq::opt::factory::getStringType(ctx, className.size() + 1), + auto kernelNameObj = LLVM::GlobalOp::create( + builder, loc, + cudaq::opt::factory::getStringType(ctx, className.size() + 1), /*isConstant=*/true, LLVM::Linkage::External, classNameStr + ".kernelName", builder.getStringAttr(classNameStr + '\0'), /*alignment=*/0); diff --git a/lib/Optimizer/Transforms/GetConcreteMatrix.cpp b/lib/Optimizer/Transforms/GetConcreteMatrix.cpp index d36b26fef14..7b64cfda9fe 100644 --- a/lib/Optimizer/Transforms/GetConcreteMatrix.cpp +++ b/lib/Optimizer/Transforms/GetConcreteMatrix.cpp @@ -7,8 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -84,8 +82,7 @@ class GetConcreteMatrixPass auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed( - applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) + if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) signalPassFailure(); } }; diff --git a/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp b/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp index 383da3b5eb0..d805931bc22 100644 --- a/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp +++ b/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp @@ -8,10 +8,7 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -169,8 +166,9 @@ struct ConstantArrayPattern return failure(); auto loc = conarr.getLoc(); if (!extracts.empty()) { - auto base = rewriter.create( - loc, cudaq::cc::PointerType::get(conarr.getType()), globalName); + auto base = cudaq::cc::AddressOfOp::create( + rewriter, loc, cudaq::cc::PointerType::get(conarr.getType()), + globalName); auto elePtrTy = cudaq::cc::PointerType::get(eleTy); for (auto extract : extracts) { SmallVector args; @@ -183,8 +181,8 @@ struct ConstantArrayPattern } OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(extract); - auto addrVal = - rewriter.create(loc, elePtrTy, base, args); + auto addrVal = cudaq::cc::ComputePtrOp::create(rewriter, loc, elePtrTy, + base, args); rewriter.replaceOpWithNewOp(extract, addrVal); } } @@ -196,8 +194,9 @@ struct ConstantArrayPattern rewriter.eraseOp(store); } if (loadAsValue) { - auto base = rewriter.create( - loc, cudaq::cc::PointerType::get(conarr.getType()), globalName); + auto base = cudaq::cc::AddressOfOp::create( + rewriter, loc, cudaq::cc::PointerType::get(conarr.getType()), + globalName); rewriter.replaceOpWithNewOp(conarr, base); } return success(); @@ -229,10 +228,10 @@ struct ReifySpanPattern : public OpRewritePattern { auto loc = reify.getLoc(); auto eleTy = cast(reify.getType()).getElementType(); - auto numEle = rewriter.create( - loc, conArr.getConstantValues().size(), 64); - Value buff = rewriter.create(loc, eleTy, numEle); - rewriter.create(loc, conArr, buff); + auto numEle = arith::ConstantIntOp::create( + rewriter, loc, conArr.getConstantValues().size(), 64); + Value buff = cudaq::cc::AllocaOp::create(rewriter, loc, eleTy, numEle); + cudaq::cc::StoreOp::create(rewriter, loc, conArr, buff); rewriter.replaceOpWithNewOp( reify, reify.getType(), buff, numEle); return success(); @@ -261,26 +260,26 @@ struct ReifySpanPattern : public OpRewritePattern { std::int64_t len = stringAttr.getValue().size() + 1; Type litTy = cudaq::cc::PointerType::get( cudaq::cc::ArrayType::get(ctx, rewriter.getI8Type(), len)); - auto strLit = rewriter.create( - loc, litTy, stringAttr); - auto size = rewriter.create(loc, len, 64); - members.push_back(rewriter.create( - loc, cudaq::cc::CharspanType::get(ctx), strLit, size)); + auto strLit = cudaq::cc::CreateStringLiteralOp::create( + rewriter, loc, litTy, stringAttr); + auto size = arith::ConstantIntOp::create(rewriter, loc, len, 64); + members.push_back(cudaq::cc::StdvecInitOp::create( + rewriter, loc, cudaq::cc::CharspanType::get(ctx), strLit, size)); } else if (auto a = dyn_cast(attr)) { if (auto floatTy = dyn_cast(eleTy)) { APFloat floatVal(floatTy.getFloatSemantics(), a.getValue()); auto floatAttr = FloatAttr::get(floatTy, floatVal); members.push_back( - rewriter.create(loc, floatAttr, floatTy)); + arith::ConstantOp::create(rewriter, loc, floatTy, floatAttr)); } else { - members.push_back(rewriter.create(loc, a, eleTy)); + members.push_back(arith::ConstantOp::create(rewriter, loc, eleTy, a)); } } else if (auto a = dyn_cast(attr)) { - members.push_back(rewriter.create(loc, a, eleTy)); + members.push_back(arith::ConstantOp::create(rewriter, loc, eleTy, a)); } else { // Unexpected attribute. LLVM_DEBUG(llvm::dbgs() << "unexpected attribute: " << attr << '\n'); - members.push_back(rewriter.create(loc, eleTy)); + members.push_back(cudaq::cc::PoisonOp::create(rewriter, loc, eleTy)); } } @@ -294,22 +293,24 @@ struct ReifySpanPattern : public OpRewritePattern { } } - auto size = rewriter.create(loc, members.size(), 64); - auto buff = rewriter.create(loc, eleTy, size); + auto size = arith::ConstantIntOp::create(rewriter, loc, members.size(), 64); + auto buff = cudaq::cc::AllocaOp::create(rewriter, loc, eleTy, size); for (auto iter : llvm::enumerate(members)) { std::int32_t idx = iter.index(); auto m = iter.value(); if (hasBoolElems) { auto unit = UnitAttr::get(rewriter.getContext()); - m = rewriter.create(loc, eleTy, m, UnitAttr(), unit); + m = cudaq::cc::CastOp::create(rewriter, loc, eleTy, m, UnitAttr(), + unit); } auto ptrEleTy = cudaq::cc::PointerType::get(eleTy); - auto ptr = rewriter.create( - loc, ptrEleTy, buff, ArrayRef{idx}); - rewriter.create(loc, m, ptr); + auto ptr = cudaq::cc::ComputePtrOp::create( + rewriter, loc, ptrEleTy, buff, + ArrayRef{idx}); + cudaq::cc::StoreOp::create(rewriter, loc, m, ptr); } Value result = - rewriter.create(loc, ty, buff, size); + cudaq::cc::StdvecInitOp::create(rewriter, loc, ty, buff, size); return result; } @@ -338,7 +339,7 @@ class GlobalizeArrayValuesPass counter); LLVM_DEBUG(llvm::dbgs() << "Before globalizing array values:\n" << module << '\n'); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) { + if (failed(applyPatternsGreedily(module, std::move(patterns)))) { signalPassFailure(); return; } diff --git a/lib/Optimizer/Transforms/LambdaLifting.cpp b/lib/Optimizer/Transforms/LambdaLifting.cpp index d62e34c079c..930e1a8ff07 100644 --- a/lib/Optimizer/Transforms/LambdaLifting.cpp +++ b/lib/Optimizer/Transforms/LambdaLifting.cpp @@ -8,14 +8,10 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/CC/CCDialect.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/IRMapping.h" -#include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -191,24 +187,25 @@ struct CreateLambdaOpPattern argTys.push_back(lambdaTy); argTys.append(sig.getInputs().begin(), sig.getInputs().end()); auto funTy = FunctionType::get(ctx, argTys, sig.getResults()); - auto thunk = rewriter.create( - loc, getThunkLambdaName(counter), funTy, emptyDict); + auto thunk = func::FuncOp::create( + rewriter, loc, getThunkLambdaName(counter), funTy, emptyDict); thunk.setPrivate(); thunk->setAttr(cudaq::kernelAttrName, rewriter.getUnitAttr()); auto *entry = thunk.addEntryBlock(); rewriter.setInsertionPointToEnd(entry); SmallVector callableArgs; if (!freeValues.empty()) { - auto closureData = rewriter.create( - loc, freeValues.getTypes(), thunk.getArgument(0)); + auto closureData = cudaq::cc::CallableClosureOp::create( + rewriter, loc, freeValues.getTypes(), thunk.getArgument(0)); callableArgs.append(closureData.getResults().begin(), closureData.getResults().end()); } callableArgs.append(thunk.getArguments().begin() + 1, thunk.getArguments().end()); - auto result = rewriter.create( - loc, sig.getResults(), getLiftedLambdaName(counter), callableArgs); - rewriter.create(loc, result.getResults()); + auto result = + func::CallOp::create(rewriter, loc, sig.getResults(), + getLiftedLambdaName(counter), callableArgs); + func::ReturnOp::create(rewriter, loc, result.getResults()); } // Create a new lambda function to lift the expression into. This function @@ -220,8 +217,8 @@ struct CreateLambdaOpPattern freeValues.getTypes().end()); argTys.append(sig.getInputs().begin(), sig.getInputs().end()); auto funTy = FunctionType::get(ctx, argTys, sig.getResults()); - auto func = rewriter.create( - loc, getLiftedLambdaName(counter), funTy, emptyDict); + auto func = func::FuncOp::create( + rewriter, loc, getLiftedLambdaName(counter), funTy, emptyDict); func.setPrivate(); func->setAttr(cudaq::kernelAttrName, rewriter.getUnitAttr()); auto *entry = func.addEntryBlock(); @@ -256,7 +253,7 @@ struct CreateLambdaOpPattern rewriter.setInsertionPointToEnd(entry); auto nextBlockIter = ++func.getBlocks().begin(); // Connect entry block to cloned code. - rewriter.create(loc, &*nextBlockIter); + cf::BranchOp::create(rewriter, loc, &*nextBlockIter); } SymbolRefAttr closureSymbol = @@ -311,12 +308,12 @@ struct ComputeActionOpPattern if (!actionCallee) return failure(); auto computeArgs = getArgs(comAct.getCompute()); - rewriter.create(loc, TypeRange{}, computeCallee, - /*isAdjoint=*/comAct.getIsDagger(), - ValueRange{}, computeArgs); - rewriter.create(loc, TypeRange{}, actionCallee, - /*isAdjoint=*/false, ValueRange{}, - getArgs(comAct.getAction())); + quake::ApplyOp::create(rewriter, loc, TypeRange{}, computeCallee, + /*isAdjoint=*/comAct.getIsDagger(), ValueRange{}, + computeArgs); + quake::ApplyOp::create(rewriter, loc, TypeRange{}, actionCallee, + /*isAdjoint=*/false, ValueRange{}, + getArgs(comAct.getAction())); rewriter.replaceOpWithNewOp( comAct, TypeRange{}, computeCallee, /*isAdjoint=*/!comAct.getIsDagger(), ValueRange{}, computeArgs); @@ -363,8 +360,8 @@ struct CallCallableOpPattern // For a callable, call the trampoline with the closure data. if (auto lambTy = dyn_cast(closureTy)) { - auto dynFunc = rewriter.create( - loc, call.getFunctionType(), closure); + auto dynFunc = cudaq::cc::CallableFuncOp::create( + rewriter, loc, call.getFunctionType(), closure); rewriter.replaceOpWithNewOp(call, dynFunc, operands); return success(); @@ -373,7 +370,7 @@ struct CallCallableOpPattern // For a normal function, there is no closure to deal with. if (auto sig = dyn_cast(closureTy)) { auto dynFunc = - rewriter.create(loc, sig, closure); + cudaq::cc::CallableFuncOp::create(rewriter, loc, sig, closure); rewriter.replaceOpWithNewOp(call, dynFunc, operands.drop_front()); return success(); @@ -436,7 +433,7 @@ class LambdaLiftingPass patterns.insert(ctx, constantPropagation); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) + if (failed(applyPatternsGreedily(module, std::move(patterns)))) signalPassFailure(); } diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index ee38a8dc151..5708d099439 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -8,10 +8,7 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" @@ -46,7 +43,7 @@ class LiftArrayAllocPass LLVM_DEBUG(llvm::dbgs() << "Before lifting constant array: " << func << '\n'); - if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns)))) + if (failed(applyPatternsGreedily(func, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() diff --git a/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc b/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc index e92c22867fd..b9757990b19 100644 --- a/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc +++ b/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc @@ -1,5 +1,5 @@ /****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -47,7 +47,7 @@ public: auto valuesAttr = rewriter.getArrayAttr(values); auto loc = alloc.getLoc(); Value conArr = - rewriter.create(loc, arrTy, valuesAttr); + cudaq::cc::ConstantArrayOp::create(rewriter, loc, arrTy, valuesAttr); assert(conArr && "must have created the constant array"); LLVM_DEBUG(llvm::dbgs() << "constant array is:\n" << conArr << '\n'); @@ -84,7 +84,8 @@ public: // load, eleTy, conArr, // ArrayRef{offset}); - auto extractValue = rewriter.create( + auto extractValue = cudaq::cc::ExtractValueOp::create( + rewriter, loc, eleTy, conArr, ArrayRef{offset}); rewriter.replaceAllUsesWith(load, extractValue); @@ -108,7 +109,7 @@ public: if (cannotEraseAlloc) { rewriter.setInsertionPointAfter(alloc); - rewriter.create(loc, conArr, alloc); + cudaq::cc::StoreOp::create(rewriter, loc, conArr, alloc); return success(); } rewriter.eraseOp(alloc); diff --git a/lib/Optimizer/Transforms/LinearCtrlRelations.cpp b/lib/Optimizer/Transforms/LinearCtrlRelations.cpp index 995eec5a365..547e2fbc29e 100644 --- a/lib/Optimizer/Transforms/LinearCtrlRelations.cpp +++ b/lib/Optimizer/Transforms/LinearCtrlRelations.cpp @@ -148,8 +148,8 @@ class LinearCtrlRelationsPass DominanceInfo domInfo(func); RewritePatternSet patterns(ctx); patterns.insert(ctx, domInfo); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) { + if (failed( + applyPatternsGreedily(func.getOperation(), std::move(patterns)))) { signalPassFailure(); } } diff --git a/lib/Optimizer/Transforms/LoopNormalize.cpp b/lib/Optimizer/Transforms/LoopNormalize.cpp index 08bfd51bf37..a3e7bb254f2 100644 --- a/lib/Optimizer/Transforms/LoopNormalize.cpp +++ b/lib/Optimizer/Transforms/LoopNormalize.cpp @@ -36,7 +36,7 @@ class LoopNormalizePass auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx, allowClosedInterval, allowBreak); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) { + if (failed(applyPatternsGreedily(op, std::move(patterns)))) { op->emitOpError("could not normalize loop"); signalPassFailure(); } diff --git a/lib/Optimizer/Transforms/LoopNormalizePatterns.inc b/lib/Optimizer/Transforms/LoopNormalizePatterns.inc index eb9b7d33cd5..bed1c04e7d2 100644 --- a/lib/Optimizer/Transforms/LoopNormalizePatterns.inc +++ b/lib/Optimizer/Transforms/LoopNormalizePatterns.inc @@ -1,5 +1,5 @@ /****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -50,19 +50,19 @@ public: } if (c.hasAlwaysFalseCondition()) { - rewriter.startRootUpdate(loop); + rewriter.startOpModification(loop); rewriter.replaceOpWithNewOp(c.compareOp, 0, 1); loop->setAttr(cudaq::opt::DeadLoopAttr, rewriter.getUnitAttr()); - rewriter.finalizeRootUpdate(loop); + rewriter.finalizeOpModification(loop); return success(); } auto loc = loop.getLoc(); // 1) Set initial value to 0. auto ty = c.initialValue.getType(); - rewriter.startRootUpdate(loop); + rewriter.startOpModification(loop); auto createConstantOp = [&](std::int64_t val) -> Value { - return rewriter.create(loc, val, ty); + return arith::ConstantIntOp::create(rewriter, loc, ty, val); }; auto zero = createConstantOp(0); loop->setOperand(c.induction, zero); @@ -74,68 +74,68 @@ public: Value step = c.stepValue; Value lower = c.initialValue; if (!c.stepIsAnAddOp()) - step = rewriter.create(loc, zero, step); + step = arith::SubIOp::create(rewriter, loc, zero, step); if (c.isLinearExpr()) { // Induction is part of a linear expression. Deal with the terms of the // equation. `m` scales the step. `b` is an addend to the lower bound. if (c.addendValue) { if (c.negatedAddend) { // `m * i - b`, u += `b`. - upper = rewriter.create(loc, upper, c.addendValue); + upper = arith::AddIOp::create(rewriter, loc, upper, c.addendValue); } else { // `m * i + b`, u -= `b`. - upper = rewriter.create(loc, upper, c.addendValue); + upper = arith::SubIOp::create(rewriter, loc, upper, c.addendValue); } } if (c.minusOneMult) { // `b - m * i` (b eliminated), multiply lower and step by `-1` (`m` // follows). auto negOne = createConstantOp(-1); - lower = rewriter.create(loc, lower, negOne); - step = rewriter.create(loc, step, negOne); + lower = arith::MulIOp::create(rewriter, loc, lower, negOne); + step = arith::MulIOp::create(rewriter, loc, step, negOne); } if (c.scaleValue) { if (c.reciprocalScale) { // `1/m * i + b` (b eliminated), multiply upper by `m`. - upper = rewriter.create(loc, upper, c.scaleValue); + upper = arith::MulIOp::create(rewriter, loc, upper, c.scaleValue); } else { // `m * i + b` (b eliminated), multiple lower and step by `m`. - lower = rewriter.create(loc, lower, c.scaleValue); - step = rewriter.create(loc, step, c.scaleValue); + lower = arith::MulIOp::create(rewriter, loc, lower, c.scaleValue); + step = arith::MulIOp::create(rewriter, loc, step, c.scaleValue); } } } if (!c.isClosedIntervalForm()) { // Note: treating the step as a signed value to process countdown loops as // well as countup loops. - Value negStepCond = rewriter.create( - loc, arith::CmpIPredicate::slt, step, zero); + Value negStepCond = arith::CmpIOp::create( + rewriter, loc, arith::CmpIPredicate::slt, step, zero); auto negOne = createConstantOp(-1); Value adj = - rewriter.create(loc, ty, negStepCond, negOne, one); - upper = rewriter.create(loc, upper, adj); + arith::SelectOp::create(rewriter, loc, ty, negStepCond, negOne, one); + upper = arith::SubIOp::create(rewriter, loc, upper, adj); } - Value diff = rewriter.create(loc, upper, lower); - Value disp = rewriter.create(loc, diff, step); + Value diff = arith::SubIOp::create(rewriter, loc, upper, lower); + Value disp = arith::AddIOp::create(rewriter, loc, diff, step); auto cmpOp = cast(c.compareOp); - Value newUpper = rewriter.create(loc, disp, step); + Value newUpper = arith::DivSIOp::create(rewriter, loc, disp, step); if (cudaq::opt::isSignedPredicate(cmpOp.getPredicate())) { - Value noLoopCond = rewriter.create( - loc, arith::CmpIPredicate::sgt, newUpper, zero); - newUpper = - rewriter.create(loc, ty, noLoopCond, newUpper, zero); + Value noLoopCond = arith::CmpIOp::create( + rewriter, loc, arith::CmpIPredicate::sgt, newUpper, zero); + newUpper = arith::SelectOp::create(rewriter, loc, ty, noLoopCond, + newUpper, zero); } // 3) Rewrite the comparison (!=) and step operations (+1). Value v1 = c.getCompareInduction(); rewriter.setInsertionPoint(cmpOp); - Value newCmp = rewriter.create( - cmpOp.getLoc(), arith::CmpIPredicate::ne, v1, newUpper); + Value newCmp = arith::CmpIOp::create( + rewriter, cmpOp.getLoc(), arith::CmpIPredicate::ne, v1, newUpper); cmpOp->replaceAllUsesWith(ValueRange{newCmp}); auto v2 = c.stepOp->getOperand( c.stepIsAnAddOp() && c.shouldCommuteStepOp() ? 1 : 0); rewriter.setInsertionPoint(c.stepOp); - auto newStep = rewriter.create(c.stepOp->getLoc(), v2, one); + auto newStep = arith::AddIOp::create(rewriter, c.stepOp->getLoc(), v2, one); c.stepOp->replaceAllUsesWith(ValueRange{newStep.getResult()}); // 4) Compute original induction value as a loop variant and replace the @@ -144,12 +144,12 @@ public: Block *entry = &loop.getBodyRegion().front(); rewriter.setInsertionPointToStart(entry); Value induct = entry->getArgument(c.induction); - auto mul = rewriter.create(loc, induct, c.stepValue); - Value newInd; - if (c.stepIsAnAddOp()) - newInd = rewriter.create(loc, c.initialValue, mul); - else - newInd = rewriter.create(loc, c.initialValue, mul); + auto mul = arith::MulIOp::create(rewriter, loc, induct, c.stepValue); + auto newInd = [&]() -> Value { + if (c.stepIsAnAddOp()) + return arith::AddIOp::create(rewriter, loc, c.initialValue, mul); + return arith::SubIOp::create(rewriter, loc, c.initialValue, mul); + }(); induct.replaceUsesWithIf(newInd, [&](OpOperand &opnd) { auto *op = opnd.getOwner(); return op != newStep.getOperation() && op != mul && @@ -163,20 +163,20 @@ public: if (!loopResult.use_empty()) { rewriter.setInsertionPointAfter(loop); auto mulRes = - rewriter.create(loc, loopResult, c.stepValue); + arith::MulIOp::create(rewriter, loc, loopResult, c.stepValue); Value recovered; if (c.stepIsAnAddOp()) recovered = - rewriter.create(loc, c.initialValue, mulRes); + arith::AddIOp::create(rewriter, loc, c.initialValue, mulRes); else recovered = - rewriter.create(loc, c.initialValue, mulRes); + arith::SubIOp::create(rewriter, loc, c.initialValue, mulRes); loopResult.replaceAllUsesExcept(recovered, mulRes.getOperation()); } } loop->setAttr(cudaq::opt::NormalizedLoopAttr, rewriter.getUnitAttr()); - rewriter.finalizeRootUpdate(loop); + rewriter.finalizeOpModification(loop); LLVM_DEBUG(llvm::dbgs() << "loop after normalization: " << loop << '\n'); return success(); } diff --git a/lib/Optimizer/Transforms/LoopPeeling.cpp b/lib/Optimizer/Transforms/LoopPeeling.cpp index b777e654d7b..0db3383ecff 100644 --- a/lib/Optimizer/Transforms/LoopPeeling.cpp +++ b/lib/Optimizer/Transforms/LoopPeeling.cpp @@ -46,8 +46,8 @@ class LoopPat : public OpRewritePattern { for (auto res : loop.getResults()) afterBlock->addArgument(res.getType(), loop.getLoc()); rewriter.setInsertionPointToEnd(oldLoopBlock); - auto finalBranch = rewriter.create(loop.getLoc(), afterBlock, - loop.getResults()); + auto finalBranch = cf::BranchOp::create(rewriter, loop.getLoc(), afterBlock, + loop.getResults()); // NB: the results of the original loop are now split between the peeled // copy of body and the modified new loop. Introduce explicit block // arguments for the phi node functionality. @@ -75,13 +75,13 @@ class LoopPat : public OpRewritePattern { rewriter.cloneRegionBefore(loop.getBodyRegion(), newLoopBlock); Block *firstBlock = beforeBlock->getNextNode(); rewriter.setInsertionPointToEnd(beforeBlock); - rewriter.create(loop.getLoc(), firstBlock, loopArgs); + cf::BranchOp::create(rewriter, loop.getLoc(), firstBlock, loopArgs); // Replace continue ops with branches to the new-loop-block. Replace break // ops with branches to the after-block. auto rewriteBranch = [&](auto op, Block *dest) { rewriter.setInsertionPointToEnd(op->getBlock()); - rewriter.create(op.getLoc(), dest, op.getOperands()); + cf::BranchOp::create(rewriter, op.getLoc(), dest, op.getOperands()); rewriter.eraseOp(op); }; for (Block *b = firstBlock; b != newLoopBlock; b = b->getNextNode()) @@ -116,7 +116,7 @@ class LoopPeelingPass auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) { + if (failed(applyPatternsGreedily(op, std::move(patterns)))) { op->emitOpError("could not peel loop"); signalPassFailure(); } diff --git a/lib/Optimizer/Transforms/LoopUnroll.cpp b/lib/Optimizer/Transforms/LoopUnroll.cpp index c6d0bf83eee..af8c9d75ff2 100644 --- a/lib/Optimizer/Transforms/LoopUnroll.cpp +++ b/lib/Optimizer/Transforms/LoopUnroll.cpp @@ -55,7 +55,7 @@ class LoopUnrollPass : public cudaq::opt::impl::LoopUnrollBase { // iteratively propagated. do { progress = 0; - (void)applyPatternsAndFoldGreedily(op, frozen); + (void)applyPatternsGreedily(op, frozen); } while (progress); } diff --git a/lib/Optimizer/Transforms/LoopUnrollPatterns.inc b/lib/Optimizer/Transforms/LoopUnrollPatterns.inc index 210ff9e3eb1..b8aa500dde8 100644 --- a/lib/Optimizer/Transforms/LoopUnrollPatterns.inc +++ b/lib/Optimizer/Transforms/LoopUnrollPatterns.inc @@ -178,7 +178,7 @@ struct UnrollCountedLoop : public OpRewritePattern { // Propagate the previous iteration number into the new block. This makes // any unneeded computation dead. DCE will clean that up as well. iterationOpers[components->induction] = iterCount; - rewriter.create(loc, cloneRange.first, iterationOpers); + cf::BranchOp::create(rewriter, loc, cloneRange.first, iterationOpers); // Bookkeeping for the next iteration, which uses the new continue block, // `conBlock`, and its arguments. setIterationOpers(contBlock->getArguments()); @@ -193,7 +193,7 @@ struct UnrollCountedLoop : public OpRewritePattern { setIterationOpers(contBlock->getArguments()); } [[maybe_unused]] auto lastBranch = - rewriter.create(loc, endBlock, iterationOpers); + cf::BranchOp::create(rewriter, loc, endBlock, iterationOpers); rewriter.replaceOp(loop, endBlock->getArguments()); LLVM_DEBUG(llvm::dbgs() << "after unrolling a loop:\n"; @@ -205,7 +205,7 @@ struct UnrollCountedLoop : public OpRewritePattern { static Value getIntegerConstant(Location loc, Type ty, std::int64_t val, PatternRewriter &rewriter) { auto attr = rewriter.getIntegerAttr(ty, val); - return rewriter.create(loc, ty, attr); + return arith::ConstantOp::create(rewriter, loc, ty, attr); } std::size_t threshold; diff --git a/lib/Optimizer/Transforms/LowerToCFG.cpp b/lib/Optimizer/Transforms/LowerToCFG.cpp index 60908717cdd..cd7466cd2cf 100644 --- a/lib/Optimizer/Transforms/LowerToCFG.cpp +++ b/lib/Optimizer/Transforms/LowerToCFG.cpp @@ -9,8 +9,6 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "mlir/IR/PatternMatch.h" @@ -59,8 +57,8 @@ class RewriteScope : public OpRewritePattern { Value stacksave; auto ptrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); if (scopeOp.hasAllocation(/*quantumAllocs=*/false)) { - auto call = rewriter.create( - loc, ptrTy, cudaq::llvmStackSave, ArrayRef{}); + auto call = func::CallOp::create(rewriter, loc, ptrTy, + cudaq::llvmStackSave, ArrayRef{}); stacksave = call.getResult(0); } auto initPos = rewriter.getInsertionPoint(); @@ -71,7 +69,7 @@ class RewriteScope : public OpRewritePattern { endBlock, scopeOp.getResultTypes(), SmallVector(scopeOp.getNumResults(), loc)); scopeResults = continueBlock->getArguments(); - rewriter.create(loc, endBlock); + cf::BranchOp::create(rewriter, loc, endBlock); endBlock = continueBlock; } @@ -85,13 +83,12 @@ class RewriteScope : public OpRewritePattern { auto *entryBlock = &scopeOp.getInitRegion().front(); rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, entryBlock, ValueRange{}); + cf::BranchOp::create(rewriter, loc, entryBlock, ValueRange{}); rewriter.inlineRegionBefore(scopeOp.getInitRegion(), endBlock); if (stacksave) { rewriter.setInsertionPointToStart(endBlock); - rewriter.create(loc, ArrayRef{}, - cudaq::llvmStackRestore, - ArrayRef{stacksave}); + func::CallOp::create(rewriter, loc, ArrayRef{}, + cudaq::llvmStackRestore, ArrayRef{stacksave}); } rewriter.replaceOp(scopeOp, scopeResults); return success(); @@ -193,7 +190,7 @@ class RewriteLoop : public OpRewritePattern { Block *continueBlock = rewriter.createBlock( endBlock, loopOp.getResultTypes(), SmallVector(loopOp.getNumResults(), loc)); - rewriter.create(loc, endBlock); + cf::BranchOp::create(rewriter, loc, endBlock); endBlock = continueBlock; } auto comparison = whileCond.getCondition(); @@ -206,14 +203,14 @@ class RewriteLoop : public OpRewritePattern { if (loopOp.isPostConditional()) { // Branch from `initBlock` to getBodyRegion().front(). rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, bodyBlock, loopOperands); + cf::BranchOp::create(rewriter, loc, bodyBlock, loopOperands); // Move the body region blocks between initBlock and end block. rewriter.inlineRegionBefore(loopOp.getBodyRegion(), endBlock); // Replace the condition op with a `cf.cond_br`. rewriter.setInsertionPointToEnd(whileBlock); - rewriter.create(loc, comparison, bodyBlock, - whileCond.getResults(), endBlock, - whileCond.getResults()); + cf::CondBranchOp::create(rewriter, loc, comparison, bodyBlock, + whileCond.getResults(), endBlock, + whileCond.getResults()); rewriter.eraseOp(whileCond); // Move the while region between the body and end block. rewriter.inlineRegionBefore(loopOp.getWhileRegion(), endBlock); @@ -222,12 +219,12 @@ class RewriteLoop : public OpRewritePattern { loopOp.hasPythonElse() ? loopOp.getElseEntryBlock() : endBlock; // Branch from `initBlock` to whileRegion().front(). rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, whileBlock, loopOperands); + cf::BranchOp::create(rewriter, loc, whileBlock, loopOperands); // Replace the condition op with a `cf.cond_br` op. rewriter.setInsertionPointToEnd(whileBlock); - rewriter.create(loc, comparison, bodyBlock, - whileCond.getResults(), elseBlock, - whileCond.getResults()); + cf::CondBranchOp::create(rewriter, loc, comparison, bodyBlock, + whileCond.getResults(), elseBlock, + whileCond.getResults()); rewriter.eraseOp(whileCond); // Move the while and body region blocks between initBlock and endBlock. rewriter.inlineRegionBefore(loopOp.getWhileRegion(), endBlock); @@ -238,8 +235,8 @@ class RewriteLoop : public OpRewritePattern { auto *stepBlock = loopOp.getStepBlock(); auto *terminator = stepBlock->getTerminator(); rewriter.setInsertionPointToEnd(stepBlock); - rewriter.create(loc, whileBlock, - terminator->getOperands()); + cf::BranchOp::create(rewriter, loc, whileBlock, + terminator->getOperands()); rewriter.eraseOp(terminator); rewriter.inlineRegionBefore(loopOp.getStepRegion(), endBlock); } diff --git a/lib/Optimizer/Transforms/LowerToCFGPatterns.inc b/lib/Optimizer/Transforms/LowerToCFGPatterns.inc index d9a62e7922f..cfef24dacf0 100644 --- a/lib/Optimizer/Transforms/LowerToCFGPatterns.inc +++ b/lib/Optimizer/Transforms/LowerToCFGPatterns.inc @@ -1,5 +1,5 @@ /****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -61,7 +61,7 @@ public: Block *continueBlock = rewriter.createBlock( endBlock, ifOp.getResultTypes(), SmallVector(ifOp.getNumResults(), loc)); - rewriter.create(loc, endBlock); + cf::BranchOp::create(rewriter, loc, endBlock); endBlock = continueBlock; } auto *thenBlock = &ifOp.getThenRegion().front(); @@ -73,9 +73,9 @@ public: if (hasElse) rewriter.inlineRegionBefore(ifOp.getElseRegion(), endBlock); rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, ifOp.getCondition(), thenBlock, - ifOp.getLinearArgs(), elseBlock, - ifOp.getLinearArgs()); + cf::CondBranchOp::create(rewriter, loc, ifOp.getCondition(), thenBlock, + ifOp.getLinearArgs(), elseBlock, + ifOp.getLinearArgs()); rewriter.replaceOp(ifOp, endBlock->getArguments()); return success(); } diff --git a/lib/Optimizer/Transforms/LowerUnwind.cpp b/lib/Optimizer/Transforms/LowerUnwind.cpp index 8746d617cee..22d4b77380c 100644 --- a/lib/Optimizer/Transforms/LowerUnwind.cpp +++ b/lib/Optimizer/Transforms/LowerUnwind.cpp @@ -8,9 +8,6 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/CC/CCDialect.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "mlir/IR/Dominance.h" @@ -371,17 +368,17 @@ struct ScopeOpPattern : public OpRewritePattern { SmallVector locs(scope.getNumResults(), loc); Block *continueBlock = rewriter.createBlock(nextBlock, scope.getResultTypes(), locs); - rewriter.create(loc, nextBlock); + cf::BranchOp::create(rewriter, loc, nextBlock); nextBlock = continueBlock; } rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, scopeBlock, ValueRange{}); + cf::BranchOp::create(rewriter, loc, scopeBlock, ValueRange{}); // Normal scope exit with inline deallocations. for (auto &pr : termAllocMap) { auto *contOp = pr.first; rewriter.setInsertionPoint(contOp); for (auto a : llvm::reverse(pr.second)) - rewriter.create(a.getLoc(), adjustedDeallocArg(a)); + quake::DeallocOp::create(rewriter, a.getLoc(), adjustedDeallocArg(a)); rewriter.replaceOpWithNewOp(contOp, nextBlock, contOp->getOperands()); } @@ -395,12 +392,13 @@ struct ScopeOpPattern : public OpRewritePattern { if (Block *blk = blockInfo.continueBlock) { rewriter.setInsertionPointToEnd(blk); for (auto a : llvm::reverse(qallocas)) - rewriter.create(a->getLoc(), adjustedDeallocArg(a)); + quake::DeallocOp::create(rewriter, a->getLoc(), + adjustedDeallocArg(a)); if (asPrimitive) { Block *landingPad = getLandingPad(infoMap, scope).continueBlock; - rewriter.create(loc, landingPad, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, landingPad, blk->getArguments()); } else { - rewriter.create(loc, blk->getArguments()); + cudaq::cc::ContinueOp::create(rewriter, loc, blk->getArguments()); } scope.getInitRegion().push_back(blk); } @@ -408,12 +406,13 @@ struct ScopeOpPattern : public OpRewritePattern { if (Block *blk = blockInfo.breakBlock) { rewriter.setInsertionPointToEnd(blk); for (auto a : llvm::reverse(qallocas)) - rewriter.create(a->getLoc(), adjustedDeallocArg(a)); + quake::DeallocOp::create(rewriter, a->getLoc(), + adjustedDeallocArg(a)); if (asPrimitive) { Block *landingPad = getLandingPad(infoMap, scope).breakBlock; - rewriter.create(loc, landingPad, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, landingPad, blk->getArguments()); } else { - rewriter.create(loc, blk->getArguments()); + cudaq::cc::BreakOp::create(rewriter, loc, blk->getArguments()); } scope.getInitRegion().push_back(blk); } @@ -421,10 +420,11 @@ struct ScopeOpPattern : public OpRewritePattern { if (Block *blk = blockInfo.returnBlock) { rewriter.setInsertionPointToEnd(blk); for (auto a : llvm::reverse(qallocas)) - rewriter.create(a->getLoc(), adjustedDeallocArg(a)); + quake::DeallocOp::create(rewriter, a->getLoc(), + adjustedDeallocArg(a)); assert(asPrimitive); Block *landingPad = getLandingPad(infoMap, scope).returnBlock; - rewriter.create(loc, landingPad, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, landingPad, blk->getArguments()); scope.getInitRegion().push_back(blk); } } @@ -454,8 +454,7 @@ struct FuncLikeOpPattern : public OpRewritePattern { assert(iter != infoMap.opParentMap.end()); if (!func->hasAttr("add_dealloc")) return success(); - rewriter.updateRootInPlace(func, - [&]() { func->removeAttr("add_dealloc"); }); + rewriter.modifyOpInPlace(func, [&]() { func->removeAttr("add_dealloc"); }); if (!iter->second.asPrimitive) { LLVM_DEBUG(llvm::dbgs() << "func was not marked as primitive in map\n"); return success(); @@ -473,7 +472,7 @@ struct FuncLikeOpPattern : public OpRewritePattern { auto *exitOp = pr.first; rewriter.setInsertionPoint(exitOp); for (auto a : llvm::reverse(pr.second)) - rewriter.create(a.getLoc(), adjustedDeallocArg(a)); + quake::DeallocOp::create(rewriter, a.getLoc(), adjustedDeallocArg(a)); } // Here, we handle the unwind return jumps. @@ -492,8 +491,9 @@ struct FuncLikeOpPattern : public OpRewritePattern { if (Block *exitBlock = blockInfo.returnBlock) { rewriter.setInsertionPointToEnd(exitBlock); for (auto a : llvm::reverse(qallocas)) - rewriter.create(a->getLoc(), adjustedDeallocArg(a)); - rewriter.create(func.getLoc(), exitBlock->getArguments()); + quake::DeallocOp::create(rewriter, a->getLoc(), + adjustedDeallocArg(a)); + TERM::create(rewriter, func.getLoc(), exitBlock->getArguments()); func.getBody().push_back(exitBlock); } } @@ -531,7 +531,7 @@ struct IfOpPattern : public OpRewritePattern { Block *continueBlock = rewriter.createBlock( endBlock, ifOp.getResultTypes(), SmallVector(ifOp.getNumResults(), loc)); - rewriter.create(loc, endBlock); + cf::BranchOp::create(rewriter, loc, endBlock); endBlock = continueBlock; } auto *thenBlock = &ifOp.getThenRegion().front(); @@ -555,19 +555,19 @@ struct IfOpPattern : public OpRewritePattern { if (auto *blk = blockInfo.continueBlock) { rewriter.setInsertionPointToEnd(blk); auto *dest = getLandingPad(infoMap, ifOp).continueBlock; - rewriter.create(loc, dest, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, dest, blk->getArguments()); tailRegion.push_back(blk); } if (auto *blk = blockInfo.breakBlock) { rewriter.setInsertionPointToEnd(blk); auto *dest = getLandingPad(infoMap, ifOp).breakBlock; - rewriter.create(loc, dest, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, dest, blk->getArguments()); tailRegion.push_back(blk); } if (auto *blk = blockInfo.returnBlock) { rewriter.setInsertionPointToEnd(blk); auto *dest = getLandingPad(infoMap, ifOp).returnBlock; - rewriter.create(loc, dest, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, dest, blk->getArguments()); tailRegion.push_back(blk); } } @@ -639,7 +639,7 @@ struct LoopOpPattern : public OpRewritePattern { Block *continueBlock = rewriter.createBlock( endBlock, loopOp.getResultTypes(), SmallVector(loopOp.getNumResults(), loc)); - rewriter.create(loc, endBlock); + cf::BranchOp::create(rewriter, loc, endBlock); endBlock = continueBlock; } auto comparison = whileCond.getCondition(); @@ -662,19 +662,19 @@ struct LoopOpPattern : public OpRewritePattern { assert(details.allocaDomMap.find(pr.first)->second.empty()); if (auto *blk = blockInfo.continueBlock) { rewriter.setInsertionPointToEnd(blk); - rewriter.create(loc, condBlock, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, condBlock, blk->getArguments()); tailRegion.push_back(blk); } if (auto *blk = blockInfo.breakBlock) { rewriter.setInsertionPointToEnd(blk); - rewriter.create(loc, endBlock, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, endBlock, blk->getArguments()); tailRegion.push_back(blk); } if (auto *blk = blockInfo.returnBlock) { rewriter.setInsertionPointToEnd(blk); auto *retBlk = getLandingPad(infoMap, loopOp).returnBlock; assert(retBlk); - rewriter.create(loc, retBlk, blk->getArguments()); + cf::BranchOp::create(rewriter, loc, retBlk, blk->getArguments()); tailRegion.push_back(blk); } } @@ -684,27 +684,27 @@ struct LoopOpPattern : public OpRewritePattern { if (loopOp.isPostConditional()) { // Branch from `initBlock` to getBodyRegion().front(). rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, bodyBlock, loopOperands); + cf::BranchOp::create(rewriter, loc, bodyBlock, loopOperands); // Move the body region blocks between initBlock and end block. rewriter.inlineRegionBefore(loopOp.getBodyRegion(), endBlock); // Replace the condition op with a `cf.cond_br`. rewriter.setInsertionPointToEnd(whileBlock); - rewriter.create(loc, comparison, bodyBlock, - whileCond.getResults(), endBlock, - whileCond.getResults()); + cf::CondBranchOp::create(rewriter, loc, comparison, bodyBlock, + whileCond.getResults(), endBlock, + whileCond.getResults()); rewriter.eraseOp(whileCond); // Move the while region between the body and end block. rewriter.inlineRegionBefore(loopOp.getWhileRegion(), endBlock); } else { // Branch from `initBlock` to whileRegion().front(). rewriter.setInsertionPointToEnd(initBlock); - rewriter.create(loc, whileBlock, loopOperands); + cf::BranchOp::create(rewriter, loc, whileBlock, loopOperands); // Replace the condition op with a `cf.cond_br` op. rewriter.setInsertionPointToEnd(whileBlock); - rewriter.create( - loc, comparison, bodyBlock, whileCond.getResults(), - loopOp.hasPythonElse() ? elseBlock : endBlock, - whileCond.getResults()); + cf::CondBranchOp::create(rewriter, loc, comparison, bodyBlock, + whileCond.getResults(), + loopOp.hasPythonElse() ? elseBlock : endBlock, + whileCond.getResults()); rewriter.eraseOp(whileCond); // Move the while and body region blocks between initBlock and endBlock. rewriter.inlineRegionBefore(loopOp.getWhileRegion(), endBlock); @@ -715,8 +715,8 @@ struct LoopOpPattern : public OpRewritePattern { auto *stepBlock = &loopOp.getStepRegion().front(); auto *terminator = stepBlock->getTerminator(); rewriter.setInsertionPointToEnd(stepBlock); - rewriter.create(loc, whileBlock, - terminator->getOperands()); + cf::BranchOp::create(rewriter, loc, whileBlock, + terminator->getOperands()); rewriter.eraseOp(terminator); rewriter.inlineRegionBefore(loopOp.getStepRegion(), endBlock); } @@ -726,7 +726,8 @@ struct LoopOpPattern : public OpRewritePattern { auto *elseBlock = &loopOp.getElseRegion().front(); auto *terminator = elseBlock->getTerminator(); rewriter.setInsertionPointToEnd(elseBlock); - rewriter.create(loc, endBlock, terminator->getOperands()); + cf::BranchOp::create(rewriter, loc, endBlock, + terminator->getOperands()); rewriter.eraseOp(terminator); rewriter.inlineRegionBefore(loopOp.getElseRegion(), endBlock); } diff --git a/lib/Optimizer/Transforms/Mapping.cpp b/lib/Optimizer/Transforms/Mapping.cpp index c7b1f33d910..d89a75c757a 100644 --- a/lib/Optimizer/Transforms/Mapping.cpp +++ b/lib/Optimizer/Transforms/Mapping.cpp @@ -6,7 +6,7 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Support/Device.h" #include "cudaq/Support/Placement.h" @@ -14,21 +14,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ScopedPrinter.h" +#include "mlir/Analysis/TopologicalSortUtils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Transforms/TopologicalSortUtils.h" - -#define DEBUG_TYPE "quantum-mapper" - -using namespace mlir; - -// Use specific cudaq elements without bringing in the full namespace -using cudaq::Device; -using cudaq::Placement; -using cudaq::QuantumMeasure; - -//===----------------------------------------------------------------------===// -// Generated logic -//===----------------------------------------------------------------------===// namespace cudaq::opt { #define GEN_PASS_DEF_MAPPINGFUNC @@ -36,6 +23,10 @@ namespace cudaq::opt { #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt +#define DEBUG_TYPE "quantum-mapper" + +using namespace mlir; + namespace { constexpr StringRef mappedWireSetName("mapped_wireset"); @@ -44,9 +35,9 @@ constexpr StringRef mappedWireSetName("mapped_wireset"); // Placement //===----------------------------------------------------------------------===// -void identityPlacement(Placement &placement) { +void identityPlacement(cudaq::Placement &placement) { for (unsigned i = 0, end = placement.getNumVirtualQubits(); i < end; ++i) - placement.map(Placement::VirtualQ(i), Placement::DeviceQ(i)); + placement.map(cudaq::Placement::VirtualQ(i), cudaq::Placement::DeviceQ(i)); } //===----------------------------------------------------------------------===// @@ -57,9 +48,9 @@ void identityPlacement(Placement &placement) { /// about the virtual qubits these wires correspond. struct VirtualOp { mlir::Operation *op; - SmallVector qubits; + SmallVector qubits; - VirtualOp(mlir::Operation *op, ArrayRef qubits) + VirtualOp(mlir::Operation *op, ArrayRef qubits) : op(op), qubits(qubits) {} }; @@ -94,13 +85,14 @@ struct VirtualOp { /// measurement mapping until the end, which is required for QIR Base Profile /// programs (see the `allowMeasurementMapping` member variable). class SabreRouter { - using WireMap = DenseMap; - using Swap = std::pair; + using WireMap = DenseMap; + using Swap = std::pair; public: - SabreRouter(const Device &device, WireMap &wireMap, Placement &placement, - unsigned extendedLayerSize, float extendedLayerWeight, - float decayDelta, unsigned roundsDecayReset) + SabreRouter(const cudaq::Device &device, WireMap &wireMap, + cudaq::Placement &placement, unsigned extendedLayerSize, + float extendedLayerWeight, float decayDelta, + unsigned roundsDecayReset) : device(device), wireToVirtualQ(wireMap), placement(placement), extendedLayerSize(extendedLayerSize), extendedLayerWeight(extendedLayerWeight), decayDelta(decayDelta), @@ -130,9 +122,9 @@ class SabreRouter { Swap chooseSwap(); private: - const Device &device; + const cudaq::Device &device; WireMap &wireToVirtualQ; - Placement &placement; + cudaq::Placement &placement; // Parameters const unsigned extendedLayerSize; @@ -145,7 +137,7 @@ class SabreRouter { SmallVector extendedLayer; SmallVector measureLayer; llvm::SmallPtrSet measureLayerSet; - llvm::SmallSet involvedPhy; + llvm::SmallSet involvedPhy; SmallVector phyDecay; SmallVector phyToWire; @@ -181,11 +173,12 @@ void SabreRouter::visitUsers(ResultRange::user_range users, } else { auto wires = quake::getQuantumOperands(user); if (entry->second == wires.size()) { - SmallVector qubits; + SmallVector qubits; for (auto wire : wires) qubits.push_back(wireToVirtualQ[wire]); // Don't process measurements until we're ready - if (allowMeasurementMapping || !user->hasTrait()) { + if (allowMeasurementMapping || + !user->hasTrait()) { layer.emplace_back(user, qubits); } else { // Add to measureLayer. Don't add duplicates. @@ -201,13 +194,14 @@ void SabreRouter::visitUsers(ResultRange::user_range users, LogicalResult SabreRouter::mapOperation(VirtualOp &virtOp) { // Take the device qubits from this operation. - SmallVector deviceQubits; + SmallVector deviceQubits; for (auto vr : virtOp.qubits) deviceQubits.push_back(placement.getPhy(vr)); // An operation cannot be mapped if it is not a measurement and uses two // qubits virtual qubit that are no adjacently placed. - if (!virtOp.op->hasTrait() && deviceQubits.size() == 2 && + if (!virtOp.op->hasTrait() && + deviceQubits.size() == 2 && !device.areConnected(deviceQubits[0], deviceQubits[1])) return failure(); @@ -280,7 +274,7 @@ void SabreRouter::selectExtendedLayer() { for (VirtualOp &virtOp : newTmpLayer) // We only add operations that can influence placement to the extended // frontlayer, i.e., quantum operators that use two qubits. - if (!virtOp.op->hasTrait() && + if (!virtOp.op->hasTrait() && quake::getQuantumOperands(virtOp.op).size() == 2) extendedLayer.emplace_back(virtOp); tmpLayer = std::move(newTmpLayer); @@ -382,10 +376,11 @@ void SabreRouter::route(Block &block, ArrayRef sources) { OpBuilder builder(&block, block.begin()); auto wireType = builder.getType(); - auto addSwap = [&](Placement::DeviceQ q0, Placement::DeviceQ q1) { + auto addSwap = [&](cudaq::Placement::DeviceQ q0, + cudaq::Placement::DeviceQ q1) { placement.swap(q0, q1); - auto swap = builder.create( - builder.getUnknownLoc(), TypeRange{wireType, wireType}, false, + auto swap = quake::SwapOp::create( + builder, builder.getUnknownLoc(), TypeRange{wireType, wireType}, false, ValueRange{}, ValueRange{}, ValueRange{phyToWire[q0.index], phyToWire[q1.index]}, DenseBoolArrayAttr{}); @@ -434,7 +429,7 @@ void SabreRouter::route(Block &block, ArrayRef sources) { LLVM_DEBUG(logger.startLine() << '\n' << logLineComment << '\n';); } -std::pair> +std::pair> deviceFromString(llvm::StringRef deviceString) { std::size_t deviceDim[2]; deviceDim[0] = deviceDim[1] = 0; @@ -476,7 +471,7 @@ deviceFromString(llvm::StringRef deviceString) { return std::make_pair(false, std::nullopt); } - return std::make_pair(false, Device::file(deviceFilename)); + return std::make_pair(false, cudaq::Device::file(deviceFilename)); } else { if (deviceString.consume_front("(")) { deviceString = deviceString.ltrim(); @@ -505,13 +500,15 @@ deviceFromString(llvm::StringRef deviceString) { } if (deviceTopoStr == "path") { - return std::make_pair(false, Device::path(deviceDim[0])); + return std::make_pair(false, cudaq::Device::path(deviceDim[0])); } else if (deviceTopoStr == "ring") { - return std::make_pair(false, Device::ring(deviceDim[0])); + return std::make_pair(false, cudaq::Device::ring(deviceDim[0])); } else if (deviceTopoStr == "star") { - return std::make_pair(false, Device::star(deviceDim[0], deviceDim[1])); + return std::make_pair(false, + cudaq::Device::star(deviceDim[0], deviceDim[1])); } else if (deviceTopoStr == "grid") { - return std::make_pair(false, Device::grid(deviceDim[0], deviceDim[1])); + return std::make_pair(false, + cudaq::Device::grid(deviceDim[0], deviceDim[1])); } else if (deviceTopoStr == "bypass") { return std::make_pair(true, std::nullopt); } else { @@ -528,7 +525,7 @@ deviceFromString(llvm::StringRef deviceString) { struct MappingPrep : public cudaq::opt::impl::MappingPrepBase { using MappingPrepBase::MappingPrepBase; - std::optional deviceInstance; + std::optional deviceInstance; bool deviceBypass = false; virtual LogicalResult initialize(MLIRContext *context) override { @@ -542,13 +539,14 @@ struct MappingPrep : public cudaq::opt::impl::MappingPrepBase { } /// Create an adjacency matrix attribute for a WireSetOp. - SparseElementsAttr getAdjacencyFromDevice(Device &d, MLIRContext *ctx) { + SparseElementsAttr getAdjacencyFromDevice(cudaq::Device &d, + MLIRContext *ctx) { int numEdges = 0; unsigned int qubitCardinality = static_cast(d.getNumQubits()); SmallVector edgeVector; for (unsigned int i = 0; i < qubitCardinality; i++) { - auto neighbors = d.getNeighbours(Device::Qubit(i)); + auto neighbors = d.getNeighbours(cudaq::Device::Qubit(i)); numEdges += neighbors.size(); for (auto neighbor : neighbors) { edgeVector.emplace_back(64, i); @@ -570,15 +568,15 @@ struct MappingPrep : public cudaq::opt::impl::MappingPrepBase { return sparseInt; } - quake::WireSetOp insertWireSetOpForDevice(Device &d, ModuleOp mod) { + quake::WireSetOp insertWireSetOpForDevice(cudaq::Device &d, ModuleOp mod) { if (auto wires = mod.lookupSymbol(mappedWireSetName)) return wires; auto adjacency = getAdjacencyFromDevice(d, mod.getContext()); OpBuilder builder(mod.getBodyRegion()); - auto wireSetOp = builder.create( - builder.getUnknownLoc(), mappedWireSetName, d.getNumQubits(), - adjacency); + auto wireSetOp = quake::WireSetOp::create(builder, builder.getUnknownLoc(), + mappedWireSetName, + d.getNumQubits(), adjacency); wireSetOp.setPrivate(); return wireSetOp; } @@ -597,7 +595,7 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { using MappingFuncBase::MappingFuncBase; bool deviceBypass = false; - std::optional deviceInstance; + std::optional deviceInstance; virtual LogicalResult initialize(MLIRContext *context) override { std::tie(deviceBypass, deviceInstance) = deviceFromString(device); @@ -705,7 +703,7 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { SmallVector sources(deviceNumQubits); SmallVector returnsToRemove; - DenseMap wireToVirtualQ; + DenseMap wireToVirtualQ; SmallVector userQubitsMeasured; DenseMap finalQubitWire; Operation *lastSource = nullptr; @@ -713,7 +711,7 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { if (auto qop = dyn_cast(op)) { // Assign a new virtual qubit to the resulting wire. auto id = qop.getIdentity(); - wireToVirtualQ[qop.getResult()] = Placement::VirtualQ(id); + wireToVirtualQ[qop.getResult()] = cudaq::Placement::VirtualQ(id); finalQubitWire[id] = qop.getResult(); sources[id] = qop; lastSource = &op; @@ -760,7 +758,7 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { // Get the wire operands and check if the operators uses at most two // qubits. N.B: Measurements do not have this restriction. auto wireOperands = quake::getQuantumOperands(&op); - if (!op.hasTrait() && wireOperands.size() > 2) { + if (!op.hasTrait() && wireOperands.size() > 2) { if (nonComposable) { func.emitError("Cannot map a kernel with operators that use more " "than two qubits."); @@ -820,11 +818,11 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { Type resTy = builder.getI1Type(); for (unsigned i = 0; i < sources.size(); i++) { if (sources[i] != nullptr) { - auto measureOp = builder.create( - finalQubitWire[i].getLoc(), TypeRange{measTy, wireTy}, - finalQubitWire[i]); - builder.create(finalQubitWire[i].getLoc(), - resTy, measureOp.getMeasOut()); + auto measureOp = + quake::MzOp::create(builder, finalQubitWire[i].getLoc(), + TypeRange{measTy, wireTy}, finalQubitWire[i]); + quake::DiscriminateOp::create(builder, finalQubitWire[i].getLoc(), + resTy, measureOp.getMeasOut()); wireToVirtualQ.insert( {measureOp.getWires()[0], wireToVirtualQ[finalQubitWire[i]]}); @@ -848,15 +846,15 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { builder.setInsertionPointAfter(lastSource); for (unsigned i = 0; i < deviceInstance->getNumQubits(); i++) { if (!sources[i]) { - auto borrowOp = builder.create( - unknownLoc, wireTy, mappedWireSetName, i); - wireToVirtualQ[borrowOp.getResult()] = Placement::VirtualQ(i); + auto borrowOp = quake::BorrowWireOp::create(builder, unknownLoc, wireTy, + mappedWireSetName, i); + wireToVirtualQ[borrowOp.getResult()] = cudaq::Placement::VirtualQ(i); sources[i] = borrowOp; } } // Place - Placement placement(sources.size(), deviceInstance->getNumQubits()); + cudaq::Placement placement(sources.size(), deviceInstance->getNumQubits()); identityPlacement(placement); // Route @@ -881,13 +879,13 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { // unsigned highestMappedQubit = 0; builder.setInsertionPoint(block.getTerminator()); auto phyToWire = router.getPhyToWire(); - for (auto &[i, s] : llvm::enumerate(sources)) { + for (const auto &[i, s] : llvm::enumerate(sources)) { if (s->getUsers().empty()) { s->erase(); } else { // highestMappedQubit = i; - builder.create(phyToWire[i].getLoc(), - phyToWire[i]); + quake::ReturnWireOp::create(builder, phyToWire[i].getLoc(), + phyToWire[i]); } } @@ -900,9 +898,9 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { // dataForOriginalQubit[v] = dataFromBackendQubit[mapping_v2p[v]]; llvm::SmallVector attrs(*highestIdentity + 1); for (unsigned int v = 0; v < *highestIdentity + 1; v++) - attrs[v] = - IntegerAttr::get(builder.getIntegerType(64), - placement.getPhy(Placement::VirtualQ(v)).index); + attrs[v] = IntegerAttr::get( + builder.getIntegerType(64), + placement.getPhy(cudaq::Placement::VirtualQ(v)).index); func->setAttr("mapping_v2p", builder.getArrayAttr(attrs)); @@ -919,7 +917,7 @@ struct MappingFunc : public cudaq::opt::impl::MappingFuncBase { measuredQubits.reserve(userQubitsMeasured.size()); for (auto mq : userQubitsMeasured) { measuredQubits.emplace_back( - mq, placement.getPhy(Placement::VirtualQ(mq)).index); + mq, placement.getPhy(cudaq::Placement::VirtualQ(mq)).index); } // First sort the pairs according to the physical qubits. llvm::sort(measuredQubits, diff --git a/lib/Optimizer/Transforms/MemToReg.cpp b/lib/Optimizer/Transforms/MemToReg.cpp index 0cece166036..0bf0d7593ab 100644 --- a/lib/Optimizer/Transforms/MemToReg.cpp +++ b/lib/Optimizer/Transforms/MemToReg.cpp @@ -17,7 +17,6 @@ /// load/store form (QLS), is required and performed. #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "llvm/ADT/MapVector.h" @@ -211,16 +210,21 @@ class RegionDataFlow { // Stitch together the control-flow across op's regions. if (auto regionOp = dyn_cast(op)) { SmallVector successors; - regionOp.getSuccessorRegions(std::nullopt, {}, successors); + regionOp.getSuccessorRegions(RegionBranchPoint::parent(), successors); for (auto iter : successors) - if (iter.getSuccessor()) + if (iter.getSuccessor() && !iter.getSuccessor()->empty()) entryCFG.insert(&iter.getSuccessor()->front()); for (auto ®ion : op->getRegions()) { + if (region.empty()) + continue; SmallVector regionExitBlocks; for (auto &b : region) if (b.hasNoSuccessors()) regionExitBlocks.push_back(&b); - regionOp.getSuccessorRegions(region.getRegionNumber(), {}, successors); + auto *terminator = region.back().getTerminator(); + if (auto terminatorOp = + dyn_cast(terminator)) + regionOp.getSuccessorRegions(terminatorOp, successors); // Every region has exactly one entry and one or more exits. for (auto *b : regionExitBlocks) for (auto iter : successors) { @@ -315,9 +319,9 @@ class RegionDataFlow { SSAReg reloadMemoryReference(OpBuilder &builder, MemRef mr) { if (isa(mr.getType())) { auto wireTy = quake::WireType::get(builder.getContext()); - return builder.create(mr.getLoc(), wireTy, mr); + return quake::UnwrapOp::create(builder, mr.getLoc(), wireTy, mr); } - return builder.create(mr.getLoc(), mr); + return cudaq::cc::LoadOp::create(builder, mr.getLoc(), mr); } SSAReg unsafeAddLiveInToBlock(Block *block, MemRef mr) { @@ -550,9 +554,9 @@ class ResetOpPattern : public OpRewritePattern { auto wireTy = quake::WireType::get(rewriter.getContext()); auto opnd = op.getTargets(); assert(opnd.getType() == quake::RefType::get(rewriter.getContext())); - Value target = rewriter.create(loc, wireTy, opnd); + Value target = quake::UnwrapOp::create(rewriter, loc, wireTy, opnd); auto newOp = - rewriter.create(loc, TypeRange{wireTy}, target); + quake::ResetOp::create(rewriter, loc, TypeRange{wireTy}, target); rewriter.replaceOpWithNewOp(op, newOp.getResult(0), opnd); return success(); } @@ -568,7 +572,7 @@ class DeallocOpPattern : public OpRewritePattern { auto wireTy = quake::WireType::get(rewriter.getContext()); auto opnd = op.getReference(); assert(isa(opnd.getType())); - Value target = rewriter.create(loc, wireTy, opnd); + Value target = quake::UnwrapOp::create(rewriter, loc, wireTy, opnd); rewriter.replaceOpWithNewOp(op, target); return success(); } @@ -594,7 +598,7 @@ class Wrapper : public OpRewritePattern { for (auto opnd : op.getControls()) { auto opndTy = opnd.getType(); if (opndTy == qrefTy) { - auto unwrap = rewriter.create(loc, wireTy, opnd); + auto unwrap = quake::UnwrapOp::create(rewriter, loc, wireTy, opnd); unwrapCtrls.push_back(unwrap); } else { unwrapCtrls.push_back(opnd); @@ -605,7 +609,7 @@ class Wrapper : public OpRewritePattern { for (auto opnd : op.getTargets()) { auto opndTy = opnd.getType(); if (opndTy == qrefTy) { - auto unwrap = rewriter.create(loc, wireTy, opnd); + auto unwrap = quake::UnwrapOp::create(rewriter, loc, wireTy, opnd); unwrapTargs.push_back(unwrap); } else { unwrapTargs.push_back(opnd); @@ -619,8 +623,8 @@ class Wrapper : public OpRewritePattern { auto opndTy = i.value().getType(); auto offset = i.index() + addend; if (opndTy == qrefTy) { - rewriter.create(loc, newOp.getResult(offset), - i.value()); + quake::WrapOp::create(rewriter, loc, newOp.getResult(offset), + i.value()); } else if (opndTy == wireTy) { op.getResult(count++).replaceAllUsesWith(newOp.getResult(offset)); } @@ -633,8 +637,8 @@ class Wrapper : public OpRewritePattern { SmallVector newTy = {op.getMeasOut().getType()}; SmallVector wireTys(unwrapTargs.size(), wireTy); newTy.append(wireTys.begin(), wireTys.end()); - auto newOp = rewriter.create(loc, newTy, unwrapTargs, - op.getRegisterNameAttr()); + auto newOp = OP::create(rewriter, loc, newTy, unwrapTargs, + op.getRegisterNameAttr()); SmallVector wireOperands = op.getTargets(); op.getResult(0).replaceAllUsesWith(newOp.getResult(0)); threadWires(wireOperands, newOp, 1); @@ -644,9 +648,9 @@ class Wrapper : public OpRewritePattern { // propagated to wrap operations. auto numberOfWires = wireCount(unwrapCtrls, unwrapTargs); SmallVector wireTys{numberOfWires, wireTy}; - auto newOp = rewriter.create( - loc, wireTys, op.getIsAdjAttr(), op.getParameters(), unwrapCtrls, - unwrapTargs, op.getNegatedQubitControlsAttr()); + auto newOp = OP::create(rewriter, loc, wireTys, op.getIsAdjAttr(), + op.getParameters(), unwrapCtrls, unwrapTargs, + op.getNegatedQubitControlsAttr()); auto wireOperands = filteredByType(qrefTy, op.getControls(), op.getTargets()); threadWires(wireOperands, newOp, 0); @@ -726,8 +730,12 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { op->erase(); } for (auto wrap : wrapOps) { - auto ref = wrap.getRefValue(); - auto wire = wrap.getWireValue(); + // In LLVM 22, the typed accessors (getRefValue/getWireValue) perform + // llvm::cast> which crashes on null operands. After + // erasing other ops above (with dropAllUses), WrapOp operands may be + // null. Use raw getOperand() to safely check for null. + Value ref = wrap->getOperand(1); // ref_value is operand 1 + Value wire = wrap->getOperand(0); // wire_value is operand 0 if (!ref || !wire.hasOneUse()) { LLVM_DEBUG(llvm::dbgs() << "erasing: "; wrap->dump(); llvm::dbgs() << '\n'); @@ -771,7 +779,7 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { elseRegion.push_back(block); OpBuilder builder(ctx); builder.setInsertionPointToEnd(block); - builder.create(ifOp.getLoc()); + cudaq::cc::ContinueOp::create(builder, ifOp.getLoc()); } } @@ -799,7 +807,7 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { OpBuilder builder(ctx); builder.setInsertionPointToStart(block); Value v = - builder.create(arg.getLoc(), wireTy, arg); + quake::UnwrapOp::create(builder, arg.getLoc(), wireTy, arg); dataFlow.addBinding(block, arg, v); } } @@ -823,7 +831,7 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { if (!dataFlow.hasBinding(block, alloc)) { OpBuilder builder(alloc); Value v = - builder.create(alloc.getLoc(), wireTy); + quake::NullWireOp::create(builder, alloc.getLoc(), wireTy); cleanUps.insert(alloc); dataFlow.addBinding(block, alloc, v); } @@ -838,14 +846,14 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { for (auto v : op->getOperands()) if (v.getType() == qrefTy && dataFlow.hasBinding(block, v)) if (auto vBinding = dataFlow.getBinding(block, v)) { - builder.create(op->getLoc(), vBinding, v); + quake::WrapOp::create(builder, op->getLoc(), vBinding, v); dataFlow.cancelBinding(block, v); } builder.setInsertionPointAfter(op); for (auto r : op->getResults()) if (r.getType() == qrefTy) { Value v = - builder.create(op->getLoc(), wireTy, r); + quake::UnwrapOp::create(builder, op->getLoc(), wireTy, r); dataFlow.addBinding(block, r, v); } } @@ -858,8 +866,8 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { if (memAnalysis.isMember(alloc)) { if (classicalValues && !dataFlow.hasBinding(block, alloc)) { OpBuilder builder(alloc); - Value v = builder.create( - alloc.getLoc(), alloc.getElementType()); + Value v = cudaq::cc::UndefOp::create(builder, alloc.getLoc(), + alloc.getElementType()); cleanUps.insert(alloc); dataFlow.addBinding(block, alloc, v); } @@ -981,7 +989,7 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { if ((v.getType() == qrefTy) && dataFlow.hasBinding(block, v)) if (auto vBinding = dataFlow.getBinding(block, v)) { OpBuilder builder(op); - builder.create(op->getLoc(), vBinding, v); + quake::WrapOp::create(builder, op->getLoc(), vBinding, v); dataFlow.cancelBinding(block, v); } @@ -1051,9 +1059,9 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { auto oldVal = dataFlow.getBinding(block, liveOut); if (!oldVal) { OpBuilder builder(term); - oldVal = builder.create( - term->getLoc(), quake::WireType::get(builder.getContext()), - liveOut); + oldVal = quake::UnwrapOp::create( + builder, term->getLoc(), + quake::WireType::get(builder.getContext()), liveOut); } addTerminatorArgument(term, target, oldVal); } else if ((usePromo || @@ -1106,14 +1114,15 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { SmallVector resultTypes(parent->getResultTypes()); for (auto d : allDefs) resultTypes.push_back(dereferencedType(d.getType())); - ConversionPatternRewriter builder(ctx); + IRRewriter builder(ctx); builder.setInsertionPoint(parent); SmallVector operands(parent->getOperands()); operands.insert(operands.end(), dataFlow.getLiveInArgs().begin(), dataFlow.getLiveInArgs().end()); Operation *np = Operation::create( parent->getLoc(), parent->getName(), resultTypes, operands, - parent->getAttrs(), parent->getSuccessors(), parent->getNumRegions()); + parent->getAttrs(), OpaqueProperties{nullptr}, + parent->getSuccessors(), parent->getNumRegions()); builder.insert(np); for (unsigned i = 0; i < parent->getNumRegions(); ++i) builder.inlineRegionBefore(parent->getRegion(i), np->getRegion(i), @@ -1124,11 +1133,11 @@ class MemToRegPass : public cudaq::opt::impl::MemToRegBase { for (auto iter : llvm::enumerate(allDefs)) { auto i = iter.index() + parent->getNumResults(); if (np->getResult(i).getType() == wireTy) - builder.create(np->getLoc(), np->getResult(i), - iter.value()); + quake::WrapOp::create(builder, np->getLoc(), np->getResult(i), + iter.value()); else - builder.create(np->getLoc(), np->getResult(i), - iter.value()); + cudaq::cc::StoreOp::create(builder, np->getLoc(), np->getResult(i), + iter.value()); } cleanUps.insert(parent); parent = np; diff --git a/lib/Optimizer/Transforms/MultiControlDecomposition.cpp b/lib/Optimizer/Transforms/MultiControlDecomposition.cpp index c05753a7800..d1b586b4c8e 100644 --- a/lib/Optimizer/Transforms/MultiControlDecomposition.cpp +++ b/lib/Optimizer/Transforms/MultiControlDecomposition.cpp @@ -7,23 +7,17 @@ ******************************************************************************/ #include "DecompositionPatterns.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeInterfaces.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" -using namespace mlir; -using namespace cudaq; - -//===----------------------------------------------------------------------===// -// Generated logic -//===----------------------------------------------------------------------===// - namespace cudaq::opt { #define GEN_PASS_DEF_MULTICONTROLDECOMPOSITION #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt +using namespace mlir; + //===----------------------------------------------------------------------===// // Helpers //===----------------------------------------------------------------------===// @@ -35,10 +29,10 @@ static Operation *createOperator(Location loc, StringRef name, SmallVector operands(parameters); operands.append(controls.begin(), controls.end()); operands.append(targets.begin(), targets.end()); - auto segmentSizes = - builder.getDenseI32ArrayAttr({static_cast(parameters.size()), - static_cast(controls.size()), - static_cast(targets.size())}); + auto segmentSizes = builder.getDenseI32ArrayAttr( + {static_cast(parameters.size()), + static_cast(controls.size()), + static_cast(targets.size())}); auto op = builder.create(loc, nameAttr, operands); op->setAttr("operand_segment_sizes", segmentSizes); return op; @@ -87,7 +81,7 @@ Decomposer::extractControls(quake::OperatorInterface op, size = veq.getSize(); for (size_t i = 0; i < size; ++i) newControls.push_back( - builder.create(op.getLoc(), control, i)); + quake::ExtractRefOp::create(builder, op.getLoc(), control, i)); } if (negControls) negatedControls.append(size, (*negControls)[index]); @@ -100,7 +94,7 @@ ArrayRef Decomposer::getAncillas(Location loc, std::size_t numAncillas) { builder.setInsertionPointToStart(entryBlock); // If we don't have enough ancillas, allocate some more. for (size_t i = allocatedAncillas.size(); i < numAncillas; ++i) - allocatedAncillas.push_back(builder.create(loc)); + allocatedAncillas.push_back(quake::AllocaOp::create(builder, loc)); return {allocatedAncillas.begin(), allocatedAncillas.begin() + numAncillas}; } @@ -137,14 +131,14 @@ LogicalResult Decomposer::v_decomposition(quake::OperatorInterface op) { // Compute intermediate results SmallVector toCleanup; std::array cs = {controls[0], controls[1]}; - toCleanup.push_back(builder.create(loc, cs, ancillas[0])); + toCleanup.push_back(quake::XOp::create(builder, loc, cs, ancillas[0])); if (!negatedControls.empty() && (negatedControls[0] || negatedControls[1])) toCleanup.back()->setAttr("negated_qubit_controls", builder.getDenseBoolArrayAttr( {negatedControls[0], negatedControls[1]})); for (std::size_t c = 2, a = 0, n = requiredAncillas + 1; c < n; ++c, ++a) { cs = {controls[c], ancillas[a]}; - toCleanup.push_back(builder.create(loc, cs, ancillas[a + 1])); + toCleanup.push_back(quake::XOp::create(builder, loc, cs, ancillas[a + 1])); if (!negatedControls.empty() && negatedControls[c]) toCleanup.back()->setAttr("negated_qubit_controls", builder.getDenseBoolArrayAttr({true, false})); @@ -174,7 +168,7 @@ LogicalResult Decomposer::v_decomposition(quake::OperatorInterface op) { //===----------------------------------------------------------------------===// namespace { struct Decomposition - : public opt::impl::MultiControlDecompositionBase { + : public cudaq::opt::impl::MultiControlDecompositionBase { using MultiControlDecompositionBase::MultiControlDecompositionBase; void runOnOperation() override { @@ -194,5 +188,4 @@ struct Decomposition }); } }; - } // namespace diff --git a/lib/Optimizer/Transforms/ObserveAnsatz.cpp b/lib/Optimizer/Transforms/ObserveAnsatz.cpp index 184f9e91984..623ba6b6eae 100644 --- a/lib/Optimizer/Transforms/ObserveAnsatz.cpp +++ b/lib/Optimizer/Transforms/ObserveAnsatz.cpp @@ -6,7 +6,7 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Transforms/Passes.h" namespace cudaq::opt { @@ -29,31 +29,32 @@ void appendMeasurement(MeasureBasis &basis, OpBuilder &builder, Location &loc, // Value semantics auto wireTy = quake::WireType::get(builder.getContext()); if (basis == MeasureBasis::X) { - auto newOp = builder.create( - loc, TypeRange{wireTy}, /*is_adj=*/false, ValueRange{}, ValueRange{}, - targets, DenseBoolArrayAttr{}); + auto newOp = quake::HOp::create( + builder, loc, TypeRange{wireTy}, /*is_adj=*/false, ValueRange{}, + ValueRange{}, targets, DenseBoolArrayAttr{}); qubit.replaceAllUsesExcept(newOp.getResult(0), newOp); qubit = newOp.getResult(0); } else if (basis == MeasureBasis::Y) { llvm::APFloat d(M_PI_2); Value rotation = - builder.create(loc, d, builder.getF64Type()); - auto newOp = builder.create( - loc, TypeRange{wireTy}, /*is_adj=*/false, ValueRange{rotation}, - ValueRange{}, ValueRange{qubit}, DenseBoolArrayAttr{}); + arith::ConstantFloatOp::create(builder, loc, builder.getF64Type(), d); + auto newOp = + quake::RxOp::create(builder, loc, TypeRange{wireTy}, /*is_adj=*/false, + ValueRange{rotation}, ValueRange{}, + ValueRange{qubit}, DenseBoolArrayAttr{}); qubit.replaceAllUsesExcept(newOp.getResult(0), newOp); qubit = newOp.getResult(0); } } else { // Reference semantics if (basis == MeasureBasis::X) { - builder.create(loc, ValueRange{}, targets); + quake::HOp::create(builder, loc, ValueRange{}, targets); } else if (basis == MeasureBasis::Y) { llvm::APFloat d(M_PI_2); Value rotation = - builder.create(loc, d, builder.getF64Type()); + arith::ConstantFloatOp::create(builder, loc, builder.getF64Type(), d); SmallVector params{rotation}; - builder.create(loc, params, ValueRange{}, targets); + quake::RxOp::create(builder, loc, params, ValueRange{}, targets); } } } @@ -304,7 +305,7 @@ class ObserveAnsatzPass auto veqOp = seekIndexed->second.first; auto index = seekIndexed->second.second; auto extractRef = - builder.create(loc, veqOp, index); + quake::ExtractRefOp::create(builder, loc, veqOp, index); qubitVal = extractRef.getResult(); } else { qubitVal = seek->second; @@ -321,19 +322,19 @@ class ObserveAnsatzPass auto measTy = quake::MeasureType::get(builder.getContext()); auto wireTy = quake::WireType::get(builder.getContext()); - for (auto &[measureNum, qubitToMeasure] : + for (const auto &[measureNum, qubitToMeasure] : llvm::enumerate(qubitsToMeasure)) { // add the measure char regName[16]; std::snprintf(regName, sizeof(regName), "r%05lu", measureNum); if (quake::isLinearType(qubitToMeasure.getType())) { - auto newOp = builder.create( - loc, TypeRange{measTy, wireTy}, ValueRange{qubitToMeasure}, + auto newOp = quake::MzOp::create( + builder, loc, TypeRange{measTy, wireTy}, ValueRange{qubitToMeasure}, builder.getStringAttr(regName)); qubitToMeasure.replaceAllUsesExcept(newOp.getResult(1), newOp); } else { - builder.create(loc, measTy, qubitToMeasure, - builder.getStringAttr(regName)); + quake::MzOp::create(builder, loc, measTy, qubitToMeasure, + builder.getStringAttr(regName)); } } diff --git a/lib/Optimizer/Transforms/PassDetails.h b/lib/Optimizer/Transforms/PassDetails.h index 5927f6b04e3..1246351fa19 100644 --- a/lib/Optimizer/Transforms/PassDetails.h +++ b/lib/Optimizer/Transforms/PassDetails.h @@ -9,22 +9,19 @@ #pragma once #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassRegistry.h" -namespace cudaq::opt { - -#define GEN_PASS_CLASSES -#include "cudaq/Optimizer/Transforms/Passes.h.inc" - -} // namespace cudaq::opt - #define GATE_OPS(MACRO) \ MACRO(XOp), MACRO(YOp), MACRO(ZOp), MACRO(HOp), MACRO(SOp), MACRO(TOp), \ MACRO(SwapOp), MACRO(R1Op), MACRO(RxOp), MACRO(PhasedRxOp), MACRO(RyOp), \ diff --git a/lib/Optimizer/Transforms/PhaseFolding.cpp b/lib/Optimizer/Transforms/PhaseFolding.cpp index 959b17d910b..f2a07aba2cc 100644 --- a/lib/Optimizer/Transforms/PhaseFolding.cpp +++ b/lib/Optimizer/Transforms/PhaseFolding.cpp @@ -7,8 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -528,7 +526,7 @@ class PhaseStorage { auto rot_arg2 = rzop.getOperand(0); auto builder = OpBuilder(rzop); auto new_rot_arg = - builder.create(rzop.getLoc(), rot_arg1, rot_arg2); + arith::AddFOp::create(builder, rzop.getLoc(), rot_arg1, rot_arg2); rzop->setOperand(0, new_rot_arg.getResult()); old_rzop.erase(); rotations[prev_idx] = rzop; diff --git a/lib/Optimizer/Transforms/Pipelines.cpp b/lib/Optimizer/Transforms/Pipelines.cpp index b52da3e3474..c7f019d9d3f 100644 --- a/lib/Optimizer/Transforms/Pipelines.cpp +++ b/lib/Optimizer/Transforms/Pipelines.cpp @@ -124,8 +124,8 @@ void cudaq::opt::addDecomposition(OpPassManager &pm, // NB: Both of these ListOption *must* be set here or they may contain garbage // and the compiler may crash. cudaq::opt::DecompositionOptions opts; - opts.disabledPatterns = disabledPats; - opts.enabledPatterns = enabledPats; + opts.disabledPatterns.assign(disabledPats.begin(), disabledPats.end()); + opts.enabledPatterns.assign(enabledPats.begin(), enabledPats.end()); pm.addPass(cudaq::opt::createDecomposition(opts)); } diff --git a/lib/Optimizer/Transforms/PruneCtrlRelations.cpp b/lib/Optimizer/Transforms/PruneCtrlRelations.cpp index 57324593a9c..b305456429b 100644 --- a/lib/Optimizer/Transforms/PruneCtrlRelations.cpp +++ b/lib/Optimizer/Transforms/PruneCtrlRelations.cpp @@ -60,7 +60,7 @@ class MakeControl : public OpRewritePattern { if (auto fromCtrl = cv.template getDefiningOp()) { input = fromCtrl.getCtrlbit(); } else { - input = rewriter.template create(loc, ctrlTy, cv); + input = quake::ToControlOp::create(rewriter, loc, ctrlTy, cv); } newCtrls.push_back(input); coarity--; @@ -72,9 +72,9 @@ class MakeControl : public OpRewritePattern { // Create a copy of `op` with the correct coarity and with the control wires // each now passing through a ToControlOp. SmallVector wireTys{coarity, wireTy}; - auto newOp = rewriter.create( - loc, wireTys, op.getIsAdjAttr(), op.getParameters(), newCtrls, - op.getTargets(), op.getNegatedQubitControlsAttr()); + auto newOp = OP::create(rewriter, loc, wireTys, op.getIsAdjAttr(), + op.getParameters(), newCtrls, op.getTargets(), + op.getNegatedQubitControlsAttr()); // Loop over the original controls again, this time adding a FromControlOp // so that the IR will type check when we replace the old op. @@ -82,8 +82,8 @@ class MakeControl : public OpRewritePattern { for (auto i : llvm::enumerate(op.getControls())) { auto cv = i.value(); if (cv.getType() == wireTy) { - Value fromCtrl = rewriter.template create( - loc, wireTy, newCtrls[i.index()]); + Value fromCtrl = quake::FromControlOp::create(rewriter, loc, wireTy, + newCtrls[i.index()]); op.getResult(i.index()).replaceAllUsesWith(fromCtrl); } else { op.getResult(i.index()).replaceAllUsesWith(newOp.getResult(newIdx++)); @@ -134,8 +134,8 @@ class PruneCtrlRelationsPass auto func = getOperation(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) { + if (failed( + applyPatternsGreedily(func.getOperation(), std::move(patterns)))) { signalPassFailure(); } } diff --git a/lib/Optimizer/Transforms/PySynthCallableBlockArgs.cpp b/lib/Optimizer/Transforms/PySynthCallableBlockArgs.cpp index 3b956e96eff..a81e787af61 100644 --- a/lib/Optimizer/Transforms/PySynthCallableBlockArgs.cpp +++ b/lib/Optimizer/Transforms/PySynthCallableBlockArgs.cpp @@ -8,15 +8,17 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Runtime.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" +namespace cudaq::opt { +#define GEN_PASS_DEF_PYSYNTHCALLABLEBLOCKARGS +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + using namespace mlir; namespace { @@ -126,13 +128,14 @@ class UpdateQuakeApplyOp : public OpConversionPattern { }; class PySynthCallableBlockArgs - : public cudaq::opt::PySynthCallableBlockArgsBase< + : public cudaq::opt::impl::PySynthCallableBlockArgsBase< PySynthCallableBlockArgs> { private: bool removeBlockArg = false; public: SmallVector names; + PySynthCallableBlockArgs() = default; PySynthCallableBlockArgs(const SmallVector &_names, bool remove) : removeBlockArg(remove), names(_names) {} @@ -191,7 +194,7 @@ class PySynthCallableBlockArgs if (isa(op.getArgument(argIndex).getType())) argsToErase.set(argIndex); - op.eraseArguments(argsToErase); + (void)op.eraseArguments(argsToErase); } } }; diff --git a/lib/Optimizer/Transforms/QuakePropagateMetadata.cpp b/lib/Optimizer/Transforms/QuakePropagateMetadata.cpp index a6db45dd7a8..2fca2ec772f 100644 --- a/lib/Optimizer/Transforms/QuakePropagateMetadata.cpp +++ b/lib/Optimizer/Transforms/QuakePropagateMetadata.cpp @@ -7,7 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Support/Debug.h" #include "mlir/Analysis/CallGraph.h" @@ -92,8 +91,8 @@ class QuakePropagateMetadataPass for (auto caller : callers) { LLVM_DEBUG(llvm::dbgs() << " Caller: " << caller.getName() << "\n\n"); - if (auto boolAttr = callee->getAttr("qubitMeasurementFeedback") - .dyn_cast_or_null()) { + if (auto boolAttr = dyn_cast_if_present( + callee->getAttr("qubitMeasurementFeedback"))) { if (boolAttr.getValue()) { LLVM_DEBUG(llvm::dbgs() << " Propagating qubitMeasurementFeedback attr: " diff --git a/lib/Optimizer/Transforms/QuakeSimplify.cpp b/lib/Optimizer/Transforms/QuakeSimplify.cpp index fcb46b1ab4f..57292f147db 100644 --- a/lib/Optimizer/Transforms/QuakeSimplify.cpp +++ b/lib/Optimizer/Transforms/QuakeSimplify.cpp @@ -7,7 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -278,10 +277,10 @@ class RotationCombine : public OpRewritePattern { return failure(); } if (qop.isAdj()) - p = rewriter.create(loc, ty, p); + p = arith::NegFOp::create(rewriter, loc, ty, p); if (prev.isAdj()) - pp = rewriter.create(loc, ty, pp); - newParams.push_back(rewriter.create(loc, ty, p, pp)); + pp = arith::NegFOp::create(rewriter, loc, ty, pp); + newParams.push_back(arith::AddFOp::create(rewriter, loc, ty, p, pp)); } // Combine the two rotations. @@ -551,7 +550,7 @@ class QuakeSimplifyPass RotationCombine, RotationCombine, RotationCombine, RotationCombine, RotationCombine>(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + if (failed(applyPatternsGreedily(op, std::move(patterns)))) signalPassFailure(); } }; diff --git a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp index 605e31e7511..8bf71b2db81 100644 --- a/lib/Optimizer/Transforms/QuakeSynthesizer.cpp +++ b/lib/Optimizer/Transforms/QuakeSynthesizer.cpp @@ -11,21 +11,21 @@ #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/CodeGen/QIROpaqueStructTypes.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Optimizer/Transforms/Passes.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Complex/IR/Complex.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Target/LLVMIR/TypeToLLVM.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +namespace cudaq::opt { +#define GEN_PASS_DEF_QUAKESYNTHESIZE +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + #define DEBUG_TYPE "quake-synthesizer" using namespace mlir; @@ -85,14 +85,14 @@ void synthesizeRuntimeArgument( template Value makeIntegerElement(OpBuilder &builder, Location argLoc, T val, IntegerType eleTy) { - return builder.create(argLoc, val, eleTy); + return arith::ConstantIntOp::create(builder, argLoc, eleTy, val); } template Value makeFloatElement(OpBuilder &builder, Location argLoc, T val, FloatType eleTy) { - return builder.create(argLoc, llvm::APFloat{val}, - eleTy); + return arith::ConstantFloatOp::create(builder, argLoc, eleTy, + llvm::APFloat{val}); } template @@ -102,7 +102,7 @@ Value makeComplexElement(OpBuilder &builder, Location argLoc, auto realPart = builder.getFloatAttr(eleTy, llvm::APFloat{val.real()}); auto imagPart = builder.getFloatAttr(eleTy, llvm::APFloat{val.imag()}); auto complexVal = builder.getArrayAttr({realPart, imagPart}); - return builder.create(argLoc, eleTy, complexVal); + return complex::ConstantOp::create(builder, argLoc, eleTy, complexVal); } /// returns true if and only if \p argument is used by a `quake.init_state` @@ -128,8 +128,9 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, auto eleTy = cast(strTy.getElementType()); builder.setInsertionPointToStart(argument.getOwner()); auto argLoc = argument.getLoc(); - auto conArray = builder.create( - argLoc, cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()), arrayAttr); + auto conArray = cudaq::cc::ConstantArrayOp::create( + builder, argLoc, cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()), + arrayAttr); auto arrTy = cudaq::cc::ArrayType::get(ctx, eleTy, vec.size()); std::optional arrayInMemory; auto ptrEleTy = cudaq::cc::PointerType::get(eleTy); @@ -150,17 +151,17 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, irBuilder.genVectorOfConstants(argLoc, module, symbol, vec); builder.setInsertionPointToStart(argument.getOwner()); - buffer = builder.create( - argLoc, cudaq::cc::PointerType::get(arrTy), symbol); + buffer = cudaq::cc::AddressOfOp::create( + builder, argLoc, cudaq::cc::PointerType::get(arrTy), symbol); } else { builder.setInsertionPointAfter(conArray); - buffer = builder.create(argLoc, arrTy); - builder.create(argLoc, conArray, buffer); + buffer = cudaq::cc::AllocaOp::create(builder, argLoc, arrTy); + cudaq::cc::StoreOp::create(builder, argLoc, conArray, buffer); } auto ptrArrEleTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(eleTy)); - Value res = builder.create(argLoc, ptrArrEleTy, buffer); + Value res = cudaq::cc::CastOp::create(builder, argLoc, ptrArrEleTy, buffer); arrayInMemory = res; return res; }; @@ -182,8 +183,8 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, // Handle the StdvecSize use case. // Replace a `vec.size()` with the length, which is a synthesized constant. if (auto stdvecSizeOp = dyn_cast(argUser)) { - Value length = builder.create( - argLoc, vec.size(), stdvecSizeOp.getType()); + Value length = arith::ConstantIntOp::create( + builder, argLoc, stdvecSizeOp.getType(), vec.size()); stdvecSizeOp.replaceAllUsesWith(length); continue; } @@ -214,14 +215,15 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, if (index == cudaq::cc::ComputePtrOp::kDynamicIndex) { OpBuilder::InsertionGuard guard(builder); builder.setInsertionPoint(elePtrOp); - Value getEle = builder.create( - elePtrOp.getLoc(), eleTy, conArray, + Value getEle = cudaq::cc::ExtractValueOp::create( + builder, elePtrOp.getLoc(), eleTy, conArray, elePtrOp.getDynamicIndices()[0]); if (failed(replaceLoads(elePtrOp, getEle))) { Value memArr = getArrayInMemory(); builder.setInsertionPoint(elePtrOp); - Value newComputedPtr = builder.create( - argLoc, ptrEleTy, memArr, elePtrOp.getDynamicIndices()[0]); + Value newComputedPtr = cudaq::cc::ComputePtrOp::create( + builder, argLoc, ptrEleTy, memArr, + elePtrOp.getDynamicIndices()[0]); elePtrOp.replaceAllUsesWith(newComputedPtr); } continue; @@ -232,8 +234,8 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, Value memArr = getArrayInMemory(); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPoint(elePtrOp); - Value newComputedPtr = builder.create( - argLoc, ptrEleTy, memArr, + Value newComputedPtr = cudaq::cc::ComputePtrOp::create( + builder, argLoc, ptrEleTy, memArr, SmallVector{0, index}); elePtrOp.replaceAllUsesWith(newComputedPtr); } @@ -259,9 +261,9 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, Value memArr = getArrayInMemory(); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointAfter(memArr.getDefiningOp()); - Value size = builder.create(argLoc, vec.size(), 64); + Value size = arith::ConstantIntOp::create(builder, argLoc, vec.size(), 64); Value newVec = - builder.create(argLoc, strTy, memArr, size); + cudaq::cc::StdvecInitOp::create(builder, argLoc, strTy, memArr, size); argument.replaceAllUsesWith(newVec); } return success(); @@ -376,7 +378,7 @@ synthesizeVectorArgument(OpBuilder &builder, ModuleOp module, unsigned &counter, namespace { class QuakeSynthesizer - : public cudaq::opt::QuakeSynthesizeBase { + : public cudaq::opt::impl::QuakeSynthesizeBase { protected: // The name of the kernel to be synthesized std::string kernelName; @@ -472,35 +474,38 @@ class QuakeSynthesizer synthesizeRuntimeArgument( builder, argument, args, offset, sizeof(bool), [=](OpBuilder &builder, bool *concrete) { - return builder.create(loc, *concrete, 1); + return arith::ConstantIntOp::create(builder, loc, *concrete, 1); }); break; case 8: synthesizeRuntimeArgument( builder, argument, args, offset, sizeof(std::uint8_t), [=](OpBuilder &builder, std::uint8_t *concrete) { - return builder.create(loc, *concrete, 8); + return arith::ConstantIntOp::create(builder, loc, *concrete, 8); }); break; case 16: synthesizeRuntimeArgument( builder, argument, args, offset, sizeof(std::int16_t), [=](OpBuilder &builder, std::int16_t *concrete) { - return builder.create(loc, *concrete, 16); + return arith::ConstantIntOp::create(builder, loc, *concrete, + 16); }); break; case 32: synthesizeRuntimeArgument( builder, argument, args, offset, sizeof(std::int32_t), [=](OpBuilder &builder, std::int32_t *concrete) { - return builder.create(loc, *concrete, 32); + return arith::ConstantIntOp::create(builder, loc, *concrete, + 32); }); break; case 64: synthesizeRuntimeArgument( builder, argument, args, offset, sizeof(std::int64_t), [=](OpBuilder &builder, std::int64_t *concrete) { - return builder.create(loc, *concrete, 64); + return arith::ConstantIntOp::create(builder, loc, *concrete, + 64); }); break; default: @@ -516,22 +521,24 @@ class QuakeSynthesizer synthesizeRuntimeArgument( builder, argument, args, offset, cudaq::opt::convertBitsToBytes(type.getIntOrFloatBitWidth()), - [=](OpBuilder &builder, float *concrete) { - llvm::APFloat f(*concrete); - return builder.create( - loc, f, builder.getF32Type()); - }); + std::function( + [=](OpBuilder &builder, float *concrete) -> Value { + llvm::APFloat f(*concrete); + return arith::ConstantFloatOp::create( + builder, loc, builder.getF32Type(), f); + })); continue; } if (type == builder.getF64Type()) { synthesizeRuntimeArgument( builder, argument, args, offset, cudaq::opt::convertBitsToBytes(type.getIntOrFloatBitWidth()), - [=](OpBuilder &builder, double *concrete) { - llvm::APFloat f(*concrete); - return builder.create( - loc, f, builder.getF64Type()); - }); + std::function( + [=](OpBuilder &builder, double *concrete) -> Value { + llvm::APFloat f(*concrete); + return arith::ConstantFloatOp::create( + builder, loc, builder.getF64Type(), f); + })); continue; } @@ -544,12 +551,13 @@ class QuakeSynthesizer synthesizeRuntimeArgument( builder, argument, args, offset, sizeof(void *), [=](OpBuilder &builder, cudaq::state **concrete) { - Value rawPtr = builder.create( - loc, reinterpret_cast(*concrete), + Value rawPtr = arith::ConstantIntOp::create( + builder, loc, reinterpret_cast(*concrete), sizeof(void *) * 8); auto stateTy = quake::StateType::get(builder.getContext()); - return builder.create( - loc, cudaq::cc::PointerType::get(stateTy), rawPtr); + return cudaq::cc::CastOp::create( + builder, loc, cudaq::cc::PointerType::get(stateTy), + rawPtr); }); continue; } else { @@ -699,30 +707,30 @@ class QuakeSynthesizer // that can be used in, say, a Pauli op. auto ptrTy = cudaq::cc::PointerType::get(charSpanTy); auto loc = arguments[idx].getLoc(); - auto ns = builder.create(loc, numberSpans, 64); - auto aos = builder.create(loc, charSpanTy, ns); + auto ns = arith::ConstantIntOp::create(builder, loc, numberSpans, 64); + auto aos = cudaq::cc::AllocaOp::create(builder, loc, charSpanTy, ns); auto pi8Ty = cudaq::cc::PointerType::get(charSpanTy.getElementType()); cudaq::IRBuilder irBuilder(module); for (decltype(numberSpans) i = 0; i < numberSpans; ++i) { std::size_t length = spanSizes[i]; - auto strLen = builder.create(loc, length, 64); + auto strLen = arith::ConstantIntOp::create(builder, loc, length, 64); StringRef strData{bufferAppendix, length}; auto global = irBuilder.genCStringLiteralAppendNul(loc, module, strData); - auto addr = builder.create( - loc, cudaq::cc::PointerType::get(global.getType()), + auto addr = cudaq::cc::AddressOfOp::create( + builder, loc, cudaq::cc::PointerType::get(global.getType()), global.getName()); - auto str = builder.create(loc, pi8Ty, addr); - auto spanp = builder.create( - loc, ptrTy, aos, + auto str = cudaq::cc::CastOp::create(builder, loc, pi8Ty, addr); + auto spanp = cudaq::cc::ComputePtrOp::create( + builder, loc, ptrTy, aos, ArrayRef{static_cast(i)}); - auto spanData = builder.create( - loc, charSpanTy, str, strLen); - builder.create(loc, spanData, spanp); + auto spanData = cudaq::cc::StdvecInitOp::create( + builder, loc, charSpanTy, str, strLen); + cudaq::cc::StoreOp::create(builder, loc, spanData, spanp); bufferAppendix += length; } auto svTy = cudaq::cc::StdvecType::get(charSpanTy); - auto ics = builder.create(loc, svTy, aos, ns); + auto ics = cudaq::cc::StdvecInitOp::create(builder, loc, svTy, aos, ns); arguments[idx].replaceAllUsesWith(ics); continue; } @@ -747,7 +755,11 @@ class QuakeSynthesizer return; } } - funcOp.eraseArguments(argsToErase); + + // FIXME: erasing the arguments like this breaks the semantics of the code + // and is a bad idea in general. This practice is HIGHLY DISCOURAGED. + if (failed(funcOp.eraseArguments(argsToErase))) + funcOp->emitWarning("could not erase arguments"); } }; diff --git a/lib/Optimizer/Transforms/RefToVeqAlloc.cpp b/lib/Optimizer/Transforms/RefToVeqAlloc.cpp index 4c5f3aa153d..fb71b65cb89 100644 --- a/lib/Optimizer/Transforms/RefToVeqAlloc.cpp +++ b/lib/Optimizer/Transforms/RefToVeqAlloc.cpp @@ -7,7 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -32,7 +31,7 @@ struct AllocaPat : public OpRewritePattern { PatternRewriter &rewriter) const override { if (isa(alloc.getType())) return failure(); - Value newAlloc = rewriter.create(alloc.getLoc(), 1u); + Value newAlloc = quake::AllocaOp::create(rewriter, alloc.getLoc(), 1u); rewriter.replaceOpWithNewOp(alloc, newAlloc, 0u); return success(); } @@ -49,7 +48,7 @@ class PromoteRefToVeqAllocPass auto *ctx = &getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) { + if (failed(applyPatternsGreedily(op, std::move(patterns)))) { op->emitOpError("could not promote allocations"); signalPassFailure(); } diff --git a/lib/Optimizer/Transforms/RegToMem.cpp b/lib/Optimizer/Transforms/RegToMem.cpp index 4d31b57b504..50a0b2eefdf 100644 --- a/lib/Optimizer/Transforms/RegToMem.cpp +++ b/lib/Optimizer/Transforms/RegToMem.cpp @@ -66,8 +66,7 @@ struct RegToMemAnalysis { unsigned getCardinality() const { return cardinality; } std::optional idFromValue(Value v) const { - auto iter = eqClasses.findValue(toOpaque(v)); - if (iter == eqClasses.end()) + if (!eqClasses.contains(toOpaque(v))) return std::nullopt; return setIds.find(eqClasses.getLeaderValue(toOpaque(v)))->second; } @@ -87,10 +86,10 @@ struct RegToMemAnalysis { auto *term = pred->getTerminator(); auto i = successorIndex(term, block); Value u = cast(term).getSuccessorOperands(i)[argNum]; - if (eqClasses.findValue(toOpaque(u)) == eqClasses.end()) - insertToEqClass(u, v); - else + if (eqClasses.contains(toOpaque(u))) eqClasses.unionSets(toOpaque(v), toOpaque(u)); + else + insertToEqClass(u, v); } } } @@ -249,8 +248,8 @@ struct RegToMemAnalysis { } unsigned id = 0; for (auto i = eqClasses.begin(), end = eqClasses.end(); i != end; ++i) - if (i->isLeader()) { - void *leader = const_cast(*eqClasses.findLeader(i)); + if ((*i)->isLeader()) { + void *leader = const_cast(*eqClasses.findLeader(**i)); setIds.insert(std::make_pair(leader, id++)); } } @@ -258,10 +257,11 @@ struct RegToMemAnalysis { // For debugging purposes. void dump() const { for (auto i = eqClasses.begin(); i != eqClasses.end(); ++i) { - if (!i->isLeader()) + if (!(*i)->isLeader()) continue; llvm::errs() << "Set {\n"; - for (auto e = eqClasses.member_begin(i); e != eqClasses.member_end(); ++e) + for (auto e = eqClasses.member_begin(**i); e != eqClasses.member_end(); + ++e) llvm::errs() << " " << Value::getFromOpaquePointer(*e) << '\n'; llvm::errs() << "}\n"; } @@ -309,15 +309,16 @@ class CollapseWrappers : public OpRewritePattern { auto args = collect(op.getOperands()); auto nameAttr = op.getRegisterNameAttr(); eraseWrapUsers(op); - auto newOp = rewriter.create( - loc, ArrayRef{op.getMeasOut().getType()}, args, nameAttr); + auto newOp = + OP::create(rewriter, loc, ArrayRef{op.getMeasOut().getType()}, + args, nameAttr); op.getResult(0).replaceAllUsesWith(newOp.getResult(0)); rewriter.eraseOp(op); } else if constexpr (std::is_same_v) { // Reset is a special case. auto targ = findLookupValue(op.getTargets()); eraseWrapUsers(op); - rewriter.create(loc, TypeRange{}, targ); + quake::ResetOp::create(rewriter, loc, TypeRange{}, targ); rewriter.eraseOp(op); } else if constexpr (std::is_same_v) { auto targ = findLookupValue(op.getTarget()); @@ -328,8 +329,8 @@ class CollapseWrappers : public OpRewritePattern { auto ctrls = collect(op.getControls()); auto targs = collect(op.getTargets()); eraseWrapUsers(op); - rewriter.create(loc, op.getIsAdj(), op.getParameters(), ctrls, targs, - op.getNegatedQubitControlsAttr()); + OP::create(rewriter, loc, op.getIsAdj(), op.getParameters(), ctrls, targs, + op.getNegatedQubitControlsAttr()); rewriter.eraseOp(op); } return success(); @@ -381,8 +382,8 @@ struct EraseWiresCondBranch : public OpRewritePattern { newFalseOperands.push_back(v); } rewriter.replaceOpWithNewOp( - branch, branch.getCondition(), newTrueOperands, newFalseOperands, - branch.getTrueDest(), branch.getFalseDest()); + branch, branch.getCondition(), branch.getTrueDest(), newTrueOperands, + branch.getFalseDest(), newFalseOperands); return success(); } BlockSet &blocks; @@ -411,8 +412,8 @@ struct EraseWiresIf : public OpRewritePattern { newIfTy.push_back(ty); auto origThenArgs = ifOp.getThenRegion().front().getArguments(); auto origElseArgs = ifOp.getElseRegion().front().getArguments(); - auto newIf = rewriter.create( - ifOp.getLoc(), newIfTy, ifOp.getCondition(), + auto newIf = cudaq::cc::IfOp::create( + rewriter, ifOp.getLoc(), newIfTy, ifOp.getCondition(), [&](OpBuilder &, Location, Region ®ion) { rewriter.inlineRegionBefore(ifOp.getThenRegion(), region, region.end()); @@ -433,8 +434,8 @@ struct EraseWiresIf : public OpRewritePattern { for (auto [arg, from] : llvm::zip(entry.getArguments(), origArgs)) { auto id = analysis.idFromValue(from); assert(id); - auto unwrap = builder.create(ifOp.getLoc(), wireTy, - allocas[*id]); + auto unwrap = quake::UnwrapOp::create(builder, ifOp.getLoc(), wireTy, + allocas[*id]); arg.replaceAllUsesWith(unwrap); } } @@ -447,7 +448,7 @@ struct EraseWiresIf : public OpRewritePattern { for (auto v : cont.getOperands()) if (!quake::isLinearType(v.getType())) newOpnds.push_back(v); - builder.create(cont.getLoc(), newOpnds); + cudaq::cc::ContinueOp::create(builder, cont.getLoc(), newOpnds); rewriter.eraseOp(cont); } }; @@ -462,8 +463,8 @@ struct EraseWiresIf : public OpRewritePattern { if (quake::isLinearType(v.getType())) { auto id = analysis.idFromValue(v); assert(id); - auto unwrap = rewriter.create(ifOp.getLoc(), wireTy, - allocas[*id]); + auto unwrap = quake::UnwrapOp::create(rewriter, ifOp.getLoc(), wireTy, + allocas[*id]); unwraps.push_back(unwrap); } else { unwraps.push_back(newIf.getResult(i++)); @@ -511,7 +512,7 @@ class RegToMemPass : public cudaq::opt::impl::RegToMemBase { builder.setInsertionPoint(nwire); auto qrefTy = quake::RefType::get(ctx); Value a = - builder.create(nwire->getLoc(), qrefTy, Value{}); + quake::AllocaOp::create(builder, nwire->getLoc(), qrefTy, Value{}); if (fromWire) borrowAllocas.push_back(a); if (auto opt = analysis.idFromValue(nwire->getResult(0))) { @@ -575,7 +576,7 @@ class RegToMemPass : public cudaq::opt::impl::RegToMemBase { if (isa(op) && !borrowAllocas.empty()) { OpBuilder builder(op); for (auto v : borrowAllocas) - builder.create(func.getLoc(), v); + quake::DeallocOp::create(builder, func.getLoc(), v); } return WalkResult::advance(); }); diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 3b7c4f30d08..f8680220eb6 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -8,11 +8,7 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -124,8 +120,7 @@ class ReplaceStateWithKernelPass LLVM_DEBUG(llvm::dbgs() << "Before replace state with kernel: " << func << '\n'); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) + if (failed(applyPatternsGreedily(func.getOperation(), std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() diff --git a/lib/Optimizer/Transforms/ResetBeforeReuse.cpp b/lib/Optimizer/Transforms/ResetBeforeReuse.cpp index 65f3e5e7d4a..ebb70a80f32 100644 --- a/lib/Optimizer/Transforms/ResetBeforeReuse.cpp +++ b/lib/Optimizer/Transforms/ResetBeforeReuse.cpp @@ -8,16 +8,10 @@ #include "PassDetails.h" #include "cudaq/Optimizer/CodeGen/Emitter.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Todo.h" #include "llvm/Support/Debug.h" -#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dominance.h" -#include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -125,7 +119,7 @@ class ResetAfterMeasurePattern : public OpRewritePattern { // Insert reset Location loc = mz->getLoc(); rewriter.setInsertionPointAfter(mz); - rewriter.create(loc, TypeRange{}, measuredQubit); + quake::ResetOp::create(rewriter, loc, TypeRange{}, measuredQubit); // Insert a conditional X to initialize qubit after reset. auto measOut = mz.getMeasOut(); mlir::Value measBit = [&]() { @@ -137,19 +131,19 @@ class ResetAfterMeasurePattern : public OpRewritePattern { } } // No discriminate exists - create the discriminate Op - auto discOp = rewriter.create( - loc, rewriter.getI1Type(), measOut); + auto discOp = quake::DiscriminateOp::create( + rewriter, loc, rewriter.getI1Type(), measOut); return discOp.getResult(); }(); - rewriter.create( - loc, TypeRange{}, measBit, + cudaq::cc::IfOp::create( + rewriter, loc, TypeRange{}, measBit, [&](OpBuilder &opBuilder, Location location, Region ®ion) { region.push_back(new Block{}); auto &bodyBlock = region.front(); OpBuilder::InsertionGuard guad(opBuilder); opBuilder.setInsertionPointToStart(&bodyBlock); - opBuilder.create(location, measuredQubit); - opBuilder.create(location); + quake::XOp::create(opBuilder, location, measuredQubit); + cudaq::cc::ContinueOp::create(opBuilder, location); }); modified = true; } else { @@ -190,7 +184,7 @@ class ResetAfterMeasurePattern : public OpRewritePattern { if (v.value() != extractOp) { // This is another extract. auto nextExtractOp = - dyn_cast_or_null(v.value()); + dyn_cast_if_present(v.value()); if (nextExtractOp) { std::optional nextIndex = nextExtractOp.hasConstantIndex() @@ -239,8 +233,8 @@ class QubitResetBeforeReusePass RegUseTracker tracker(funcOp); RewritePatternSet patterns(ctx); patterns.insert(ctx, tracker); - if (failed(applyPatternsAndFoldGreedily(funcOp.getOperation(), - std::move(patterns)))) { + if (failed(applyPatternsGreedily(funcOp.getOperation(), + std::move(patterns)))) { funcOp.emitOpError("Adding qubit reset before reuse pass failed"); signalPassFailure(); } diff --git a/lib/Optimizer/Transforms/ResourceCount.cpp b/lib/Optimizer/Transforms/ResourceCount.cpp index ed6ce573c67..8d553399c23 100644 --- a/lib/Optimizer/Transforms/ResourceCount.cpp +++ b/lib/Optimizer/Transforms/ResourceCount.cpp @@ -7,9 +7,8 @@ ******************************************************************************/ #include "cudaq/Optimizer/Transforms/ResourceCount.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "PassDetails.h" #include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Transforms/Passes.h" using namespace mlir; diff --git a/lib/Optimizer/Transforms/ResourceCountPreprocess.cpp b/lib/Optimizer/Transforms/ResourceCountPreprocess.cpp index cafec122895..e9b12a8e4c0 100644 --- a/lib/Optimizer/Transforms/ResourceCountPreprocess.cpp +++ b/lib/Optimizer/Transforms/ResourceCountPreprocess.cpp @@ -10,27 +10,21 @@ #include "PassDetails.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" #include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" -#define DEBUG_TYPE "resource-count-preprocess" - -using namespace mlir; - -//===----------------------------------------------------------------------===// -// Generated logic -//===----------------------------------------------------------------------===// namespace cudaq::opt { #define GEN_PASS_DEF_RESOURCECOUNTPREPROCESS #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt +#define DEBUG_TYPE "resource-count-preprocess" + +using namespace mlir; + struct ResourceCountPreprocessPass : public cudaq::opt::impl::ResourceCountPreprocessBase< ResourceCountPreprocessPass> { diff --git a/lib/Optimizer/Transforms/SROA.cpp b/lib/Optimizer/Transforms/SROA.cpp index a2b48db86d5..e8017e9c412 100644 --- a/lib/Optimizer/Transforms/SROA.cpp +++ b/lib/Optimizer/Transforms/SROA.cpp @@ -7,8 +7,6 @@ ******************************************************************************/ #include "PassDetails.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -74,12 +72,12 @@ class AllocaAggregate : public OpRewritePattern { if (auto strTy = dyn_cast(allocOp.getElementType())) { for (auto mTy : strTy.getMembers()) - scalars.push_back(rewriter.create(loc, mTy)); + scalars.push_back(cudaq::cc::AllocaOp::create(rewriter, loc, mTy)); } else if (auto arrTy = dyn_cast(allocOp.getElementType())) { Type vTy = arrTy.getElementType(); for (cudaq::cc::ArrayType::SizeType i = 0; i < arrTy.getSize(); ++i) - scalars.push_back(rewriter.create(loc, vTy)); + scalars.push_back(cudaq::cc::AllocaOp::create(rewriter, loc, vTy)); } // Replace the cc.compute_ptr ops with forwarding. @@ -100,19 +98,21 @@ class AllocaAggregate : public OpRewritePattern { rewriter.setInsertionPoint(loadOp); auto loadTy = loadOp.getType(); auto loc = loadOp.getLoc(); - Value result = rewriter.create(loc, loadTy); + Value result = cudaq::cc::UndefOp::create(rewriter, loc, loadTy); if (auto strTy = dyn_cast(loadTy)) { for (auto [i, mTy] : llvm::enumerate(strTy.getMembers())) { - Value loadEle = rewriter.create(loc, scalars[i]); - result = rewriter.create( - loc, loadTy, result, loadEle, i); + Value loadEle = + cudaq::cc::LoadOp::create(rewriter, loc, scalars[i]); + result = cudaq::cc::InsertValueOp::create(rewriter, loc, loadTy, + result, loadEle, i); } } else { auto arrTy = cast(loadTy); for (cudaq::cc::ArrayType::SizeType i = 0; i < arrTy.getSize(); ++i) { - Value loadEle = rewriter.create(loc, scalars[i]); - result = rewriter.create( - loc, loadTy, result, loadEle, i); + Value loadEle = + cudaq::cc::LoadOp::create(rewriter, loc, scalars[i]); + result = cudaq::cc::InsertValueOp::create(rewriter, loc, loadTy, + result, loadEle, i); } } updates.emplace_back(loadOp, result); @@ -211,8 +211,8 @@ class StoreAggregate : public OpRewritePattern { auto loc = insVal.getLoc(); auto vTy = cudaq::cc::PointerType::get(v.getType()); auto toAddr = - rewriter.create(loc, vTy, dest, args); - rewriter.create(loc, v, toAddr); + cudaq::cc::ComputePtrOp::create(rewriter, loc, vTy, dest, args); + cudaq::cc::StoreOp::create(rewriter, loc, v, toAddr); } LLVM_DEBUG(llvm::dbgs() << "updated: " << storeOp << '\n'); rewriter.eraseOp(storeOp); @@ -230,7 +230,7 @@ class SROAPass : public cudaq::opt::impl::SROABase { LLVM_DEBUG(llvm::dbgs() << "Before SROA:\n" << *op << '\n'); RewritePatternSet patterns(ctx); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) { + if (failed(applyPatternsGreedily(op, std::move(patterns)))) { signalPassFailure(); return; } diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index 5cdca277dc6..07dc1a53025 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -8,13 +8,9 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Factory.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "llvm/Support/Debug.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" @@ -163,13 +159,13 @@ class StateGateBuilder { void applyRotationOp(double theta, std::size_t target) { auto qubit = createQubitRef(target); auto thetaValue = createAngleValue(theta); - rewriter.create(loc, thetaValue, mlir::ValueRange{}, qubit); + Op::create(rewriter, loc, thetaValue, mlir::ValueRange{}, qubit); }; void applyX(std::size_t control, std::size_t target) { auto qubitC = createQubitRef(control); auto qubitT = createQubitRef(target); - rewriter.create(loc, qubitC, qubitT); + quake::XOp::create(rewriter, loc, qubitC, qubitT); }; private: @@ -177,14 +173,14 @@ class StateGateBuilder { if (qubitRefs.contains(index)) return qubitRefs[index]; - auto ref = rewriter.create(loc, qubits, index); + auto ref = quake::ExtractRefOp::create(rewriter, loc, qubits, index); qubitRefs[index] = ref; return ref; } mlir::Value createAngleValue(double angle) { - return rewriter.create( - loc, llvm::APFloat{angle}, rewriter.getF64Type()); + return arith::ConstantFloatOp::create(rewriter, loc, rewriter.getF64Type(), + llvm::APFloat{angle}); } PatternRewriter &rewriter; @@ -451,7 +447,7 @@ class StatePreparationPass patterns.insert(ctx, phaseThreshold); patterns.insert(ctx); - if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns)))) { + if (failed(applyPatternsGreedily(func, std::move(patterns)))) { func.emitOpError("State preparation failed"); signalPassFailure(); } diff --git a/lib/Optimizer/Transforms/UnitarySynthesis.cpp b/lib/Optimizer/Transforms/UnitarySynthesis.cpp index 590e6a61c0e..baab8de9b9e 100644 --- a/lib/Optimizer/Transforms/UnitarySynthesis.cpp +++ b/lib/Optimizer/Transforms/UnitarySynthesis.cpp @@ -10,12 +10,9 @@ #include "common/EigenDense.h" #include "cudaq/Optimizer/Builder/Factory.h" #include "cudaq/Optimizer/CodeGen/Passes.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "llvm/Support/Debug.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -31,7 +28,6 @@ namespace cudaq::opt { #define DEBUG_TYPE "unitary-synthesis" using namespace mlir; -using namespace std::complex_literals; namespace { @@ -80,6 +76,7 @@ struct OneQubitOpZYZ : public Decomposer { /// corresponding explanation in https://threeplusone.com/pubs/on_gates.pdf, /// Section 4. void decompose() override { + using namespace std::complex_literals; /// Rescale the input unitary matrix, `u`, to be special unitary. /// Extract a phase factor, `phase`, so that /// `determinant(inverse_phase * unitary) = 1` @@ -110,8 +107,8 @@ struct OneQubitOpZYZ : public Decomposer { FunctionType::get(parentModule.getContext(), targets[0].getType(), {}); auto insPt = rewriter.saveInsertionPoint(); rewriter.setInsertionPointToStart(parentModule.getBody()); - auto func = - rewriter.create(parentModule->getLoc(), funcName, funcTy); + auto func = func::FuncOp::create(rewriter, parentModule->getLoc(), funcName, + funcTy); func.setPrivate(); auto *block = func.addEntryBlock(); rewriter.setInsertionPointToStart(block); @@ -123,17 +120,17 @@ struct OneQubitOpZYZ : public Decomposer { if (isAboveThreshold(angles.gamma)) { auto gamma = cudaq::opt::factory::createFloatConstant( loc, rewriter, angles.gamma, floatTy); - rewriter.create(loc, gamma, ValueRange{}, arguments); + quake::RzOp::create(rewriter, loc, gamma, ValueRange{}, arguments); } if (isAboveThreshold(angles.beta)) { auto beta = cudaq::opt::factory::createFloatConstant( loc, rewriter, angles.beta, floatTy); - rewriter.create(loc, beta, ValueRange{}, arguments); + quake::RyOp::create(rewriter, loc, beta, ValueRange{}, arguments); } if (isAboveThreshold(angles.alpha)) { auto alpha = cudaq::opt::factory::createFloatConstant( loc, rewriter, angles.alpha, floatTy); - rewriter.create(loc, alpha, ValueRange{}, arguments); + quake::RzOp::create(rewriter, loc, alpha, ValueRange{}, arguments); } /// NOTE: Typically global phase can be ignored but, if this decomposition /// is applied in a kernel that is called with `cudaq::control`, the global @@ -145,11 +142,11 @@ struct OneQubitOpZYZ : public Decomposer { if (isAboveThreshold(globalPhase)) { auto phase = cudaq::opt::factory::createFloatConstant( loc, rewriter, globalPhase, floatTy); - Value negPhase = rewriter.create(loc, phase); - rewriter.create(loc, phase, ValueRange{}, arguments[0]); - rewriter.create(loc, negPhase, ValueRange{}, arguments[0]); + Value negPhase = arith::NegFOp::create(rewriter, loc, phase); + quake::R1Op::create(rewriter, loc, phase, ValueRange{}, arguments[0]); + quake::RzOp::create(rewriter, loc, negPhase, ValueRange{}, arguments[0]); } - rewriter.create(loc); + func::ReturnOp::create(rewriter, loc); rewriter.restoreInsertionPoint(insPt); } @@ -180,6 +177,7 @@ struct KAKComponents { /// 0 i −1 0 /// 1 0 0 −i const Eigen::Matrix4cd &MagicBasisMatrix() { + using namespace std::complex_literals; static Eigen::Matrix4cd MagicBasisMatrix; MagicBasisMatrix << 1.0, 0.0, 0.0, 1i, 0.0, 1i, 1.0, 0, 0, 1i, -1.0, 0, 1.0, 0, 0, -1i; @@ -278,6 +276,7 @@ extractSU2FromSO4(const Eigen::Matrix4cd &matrix) { /// Compute exp(i(x XX + y YY + z ZZ)) matrix for verification Eigen::Matrix4cd canonicalVecToMatrix(double x, double y, double z) { + using namespace std::complex_literals; Eigen::Matrix2cd X{Eigen::Matrix2cd::Zero()}; Eigen::Matrix2cd Y{Eigen::Matrix2cd::Zero()}; Eigen::Matrix2cd Z{Eigen::Matrix2cd::Zero()}; @@ -300,6 +299,7 @@ struct TwoQubitOpKAK : public Decomposer { /// Ref: https://arxiv.org/pdf/quant-ph/0507171 /// Ref: https://arxiv.org/pdf/0806.4015 void decompose() override { + using namespace std::complex_literals; /// Step0: Convert to special unitary phase = std::pow(targetMatrix.determinant(), 0.25); auto specialUnitary = targetMatrix / phase; @@ -355,8 +355,8 @@ struct TwoQubitOpKAK : public Decomposer { FunctionType::get(parentModule.getContext(), targets.getTypes(), {}); auto insPt = rewriter.saveInsertionPoint(); rewriter.setInsertionPointToStart(parentModule.getBody()); - auto func = - rewriter.create(parentModule->getLoc(), funcName, funcTy); + auto func = func::FuncOp::create(rewriter, parentModule->getLoc(), funcName, + funcTy); func.setPrivate(); auto *block = func.addEntryBlock(); rewriter.setInsertionPointToStart(block); @@ -364,67 +364,67 @@ struct TwoQubitOpKAK : public Decomposer { FloatType floatTy = rewriter.getF64Type(); /// NOTE: Operator notation is right-to-left, whereas circuit notation is /// left-to-right. Hence, operations are applied in reverse order. - rewriter.create( - loc, TypeRange{}, + quake::ApplyOp::create( + rewriter, loc, TypeRange{}, SymbolRefAttr::get(rewriter.getContext(), funcName + "b0"), false, ValueRange{}, ValueRange{arguments[1]}); - rewriter.create( - loc, TypeRange{}, + quake::ApplyOp::create( + rewriter, loc, TypeRange{}, SymbolRefAttr::get(rewriter.getContext(), funcName + "b1"), false, ValueRange{}, ValueRange{arguments[0]}); /// TODO: Refactor to use a transformation pass for `quake.exp_pauli` /// XX if (isAboveThreshold(components.x)) { - rewriter.create(loc, arguments[0]); - rewriter.create(loc, arguments[1]); - rewriter.create(loc, arguments[1], arguments[0]); + quake::HOp::create(rewriter, loc, arguments[0]); + quake::HOp::create(rewriter, loc, arguments[1]); + quake::XOp::create(rewriter, loc, arguments[1], arguments[0]); auto xAngle = cudaq::opt::factory::createFloatConstant( loc, rewriter, -2.0 * components.x, floatTy); - rewriter.create(loc, xAngle, ValueRange{}, arguments[0]); - rewriter.create(loc, arguments[1], arguments[0]); - rewriter.create(loc, arguments[1]); - rewriter.create(loc, arguments[0]); + quake::RzOp::create(rewriter, loc, xAngle, ValueRange{}, arguments[0]); + quake::XOp::create(rewriter, loc, arguments[1], arguments[0]); + quake::HOp::create(rewriter, loc, arguments[1]); + quake::HOp::create(rewriter, loc, arguments[0]); } /// YY if (isAboveThreshold(components.y)) { auto piBy2 = cudaq::opt::factory::createFloatConstant(loc, rewriter, M_PI_2, floatTy); - rewriter.create(loc, piBy2, ValueRange{}, arguments[0]); - rewriter.create(loc, piBy2, ValueRange{}, arguments[1]); - rewriter.create(loc, arguments[1], arguments[0]); + quake::RxOp::create(rewriter, loc, piBy2, ValueRange{}, arguments[0]); + quake::RxOp::create(rewriter, loc, piBy2, ValueRange{}, arguments[1]); + quake::XOp::create(rewriter, loc, arguments[1], arguments[0]); auto yAngle = cudaq::opt::factory::createFloatConstant( loc, rewriter, -2.0 * components.y, floatTy); - rewriter.create(loc, yAngle, ValueRange{}, arguments[0]); - rewriter.create(loc, arguments[1], arguments[0]); - Value negPiBy2 = rewriter.create(loc, piBy2); - rewriter.create(loc, negPiBy2, ValueRange{}, arguments[1]); - rewriter.create(loc, negPiBy2, ValueRange{}, arguments[0]); + quake::RzOp::create(rewriter, loc, yAngle, ValueRange{}, arguments[0]); + quake::XOp::create(rewriter, loc, arguments[1], arguments[0]); + Value negPiBy2 = arith::NegFOp::create(rewriter, loc, piBy2); + quake::RxOp::create(rewriter, loc, negPiBy2, ValueRange{}, arguments[1]); + quake::RxOp::create(rewriter, loc, negPiBy2, ValueRange{}, arguments[0]); } /// ZZ if (isAboveThreshold(components.z)) { - rewriter.create(loc, arguments[1], arguments[0]); + quake::XOp::create(rewriter, loc, arguments[1], arguments[0]); auto zAngle = cudaq::opt::factory::createFloatConstant( loc, rewriter, -2.0 * components.z, floatTy); - rewriter.create(loc, zAngle, ValueRange{}, arguments[0]); - rewriter.create(loc, arguments[1], arguments[0]); + quake::RzOp::create(rewriter, loc, zAngle, ValueRange{}, arguments[0]); + quake::XOp::create(rewriter, loc, arguments[1], arguments[0]); } - rewriter.create( - loc, TypeRange{}, + quake::ApplyOp::create( + rewriter, loc, TypeRange{}, SymbolRefAttr::get(rewriter.getContext(), funcName + "a0"), false, ValueRange{}, ValueRange{arguments[1]}); - rewriter.create( - loc, TypeRange{}, + quake::ApplyOp::create( + rewriter, loc, TypeRange{}, SymbolRefAttr::get(rewriter.getContext(), funcName + "a1"), false, ValueRange{}, ValueRange{arguments[0]}); auto globalPhase = 2.0 * std::arg(phase); if (isAboveThreshold(globalPhase)) { auto phase = cudaq::opt::factory::createFloatConstant( loc, rewriter, globalPhase, floatTy); - Value negPhase = rewriter.create(loc, phase); - rewriter.create(loc, phase, ValueRange{}, arguments[0]); - rewriter.create(loc, negPhase, ValueRange{}, arguments[0]); + Value negPhase = arith::NegFOp::create(rewriter, loc, phase); + quake::R1Op::create(rewriter, loc, phase, ValueRange{}, arguments[0]); + quake::RzOp::create(rewriter, loc, negPhase, ValueRange{}, arguments[0]); } - rewriter.create(loc); + func::ReturnOp::create(rewriter, loc); rewriter.restoreInsertionPoint(insPt); } @@ -499,8 +499,8 @@ class UnitarySynthesisPass RewritePatternSet patterns(ctx); patterns.insert(ctx); LLVM_DEBUG(llvm::dbgs() << "Before unitary synthesis: " << func << '\n'); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) + if (failed( + applyPatternsGreedily(func.getOperation(), std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After unitary synthesis: " << func << '\n'); } diff --git a/lib/Optimizer/Transforms/VariableCoalesce.cpp b/lib/Optimizer/Transforms/VariableCoalesce.cpp index b20c5047e35..74d12ba01dd 100644 --- a/lib/Optimizer/Transforms/VariableCoalesce.cpp +++ b/lib/Optimizer/Transforms/VariableCoalesce.cpp @@ -242,7 +242,7 @@ class VariableCoalescePass } auto loc = o->getLoc(); auto ty = cast(o).getElementType(); - auto newVar = rewriter.create(loc, ty); + auto newVar = cudaq::cc::AllocaOp::create(rewriter, loc, ty); analysis.addBinding(o, newVar); } } @@ -250,7 +250,7 @@ class VariableCoalescePass // Step 2: Replace old variables with new ones. RewritePatternSet patterns(ctx); patterns.insert(ctx, analysis); - if (failed(applyPatternsAndFoldGreedily(func, std::move(patterns)))) + if (failed(applyPatternsGreedily(func, std::move(patterns)))) signalPassFailure(); LLVM_DEBUG(llvm::dbgs() << "After variable coalescing:\n" << func << "\n\n"); diff --git a/lib/Optimizer/Transforms/WiresToWiresets.cpp b/lib/Optimizer/Transforms/WiresToWiresets.cpp index cc674b9cbfb..392c4005559 100644 --- a/lib/Optimizer/Transforms/WiresToWiresets.cpp +++ b/lib/Optimizer/Transforms/WiresToWiresets.cpp @@ -6,11 +6,9 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ +#include "PassDetails.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" -#include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Characteristics.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Threading.h" @@ -18,17 +16,14 @@ #include "mlir/Rewrite/FrozenRewritePatternSet.h" #include "mlir/Transforms/DialectConversion.h" -using namespace mlir; - -//===----------------------------------------------------------------------===// -// Generated logic -//===----------------------------------------------------------------------===// namespace cudaq::opt { #define GEN_PASS_DEF_ASSIGNWIREINDICES #define GEN_PASS_DEF_ADDWIRESET #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt +using namespace mlir; + namespace { class NullWirePat : public OpRewritePattern { public: @@ -111,9 +106,9 @@ struct AddWiresetPass void runOnOperation() override { ModuleOp mod = getOperation(); OpBuilder builder(mod.getBodyRegion()); - auto wireSetOp = builder.create( - builder.getUnknownLoc(), cudaq::opt::topologyAgnosticWiresetName, - INT_MAX, ElementsAttr{}); + auto wireSetOp = quake::WireSetOp::create( + builder, builder.getUnknownLoc(), + cudaq::opt::topologyAgnosticWiresetName, INT_MAX, ElementsAttr{}); wireSetOp.setPrivate(); } }; diff --git a/lib/Optimizer/Transforms/WriteAfterWriteElimination.cpp b/lib/Optimizer/Transforms/WriteAfterWriteElimination.cpp index e377d771427..67484bc9f2c 100644 --- a/lib/Optimizer/Transforms/WriteAfterWriteElimination.cpp +++ b/lib/Optimizer/Transforms/WriteAfterWriteElimination.cpp @@ -8,10 +8,7 @@ #include "PassDetails.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" diff --git a/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc b/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc index 867e93c743c..c971ee3f4f6 100644 --- a/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc +++ b/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc @@ -1,5 +1,5 @@ /****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -94,8 +94,8 @@ private: if (auto store = dyn_cast(&op)) { auto ptr = store.getPtrvalue().getDefiningOp(); if (isStoreToStack(store)) { - auto &[b, ptrToStores] = blockInfo.FindAndConstruct(block); - auto &[p, stores] = ptrToStores.FindAndConstruct(ptr); + auto ptrToStores = blockInfo[block]; + auto stores = ptrToStores[ptr]; stores.push_back(&op); } } @@ -113,7 +113,7 @@ private: /// cc.store %c0_i64, %3 : !cc.ptr /// ``` static bool isStoreToStack(cudaq::cc::StoreOp store) { - auto ptrOp = store.getPtrvalue(); + Value ptrOp = store.getPtrvalue(); if (auto cast = ptrOp.getDefiningOp()) ptrOp = cast.getOperand(); diff --git a/lib/Verifier/NVQIRCalls.cpp b/lib/Verifier/NVQIRCalls.cpp index 4d8b6fd5ecc..7c473885616 100644 --- a/lib/Verifier/NVQIRCalls.cpp +++ b/lib/Verifier/NVQIRCalls.cpp @@ -39,7 +39,7 @@ constexpr const char *libcFuncs[] = {"malloc", "free", "memcpy", "memset"}; static bool isVerifiedFunction(StringRef name, const SmallVector &goldenFuncs) { auto prefixCheck = [&](const char *prefix) { - return name.startswith(prefix); + return name.starts_with(prefix); }; // Check if name has an accepted QIR or LLVM intrinsic prefix. diff --git a/lib/Verifier/QIRLLVMIRDialect.cpp b/lib/Verifier/QIRLLVMIRDialect.cpp index ba6df68db4c..92812236d1d 100644 --- a/lib/Verifier/QIRLLVMIRDialect.cpp +++ b/lib/Verifier/QIRLLVMIRDialect.cpp @@ -74,7 +74,7 @@ LogicalResult cudaq::verifier::checkQIRLLVMIRDialect(ModuleOp module, func && func->hasAttr(cudaq::kernelAttrName)) funcs.push_back(func); - const bool isBaseProfile = profile.startswith("qir-base"); + const bool isBaseProfile = profile.starts_with("qir-base"); auto *ctx = module.getContext(); for (auto func : funcs) { auto walkResult = func.walk([&](Operation *op) { @@ -87,19 +87,28 @@ LogicalResult cudaq::verifier::checkQIRLLVMIRDialect(ModuleOp module, if (!funcNameAttr) return WalkResult::advance(); auto funcName = funcNameAttr.getValue(); - if (isBaseProfile && (!funcName.startswith("__quantum_") || - funcName.equals(cudaq::opt::QIRCustomOp))) { + if (isBaseProfile && (!funcName.starts_with("__quantum_") || + funcName == cudaq::opt::QIRCustomOp)) { call.emitOpError("unexpected call in QIR base profile"); return WalkResult::interrupt(); } // Check that qubits are unique values. const std::size_t numOpnds = call.getNumOperands(); - auto qubitTy = cudaq::opt::getQubitType(ctx); - if (numOpnds > 0) - for (std::size_t i = 0; i < numOpnds - 1; ++i) + auto qubitTy = cudaq::cg::getQubitType(ctx); + // Determine how many leading operands are qubit pointers. With + // opaque pointers, Qubit* and Result* are both !llvm.ptr so we + // cannot distinguish them by type. For measurement functions + // like mz__body(Qubit*, Result*), only the first operand is a + // qubit; the second is a Result. Limit the uniqueness check to + // qubit operand indices only. + std::size_t numQubitOpnds = numOpnds; + if (funcName == cudaq::opt::QIRMeasureBody) + numQubitOpnds = 1; + if (numQubitOpnds > 1) + for (std::size_t i = 0; i < numQubitOpnds - 1; ++i) if (call.getOperand(i).getType() == qubitTy) - for (std::size_t j = i + 1; j < numOpnds; ++j) + for (std::size_t j = i + 1; j < numQubitOpnds; ++j) if (call.getOperand(j).getType() == qubitTy) { auto i1 = call.getOperand(i).getDefiningOp(); diff --git a/lib/Verifier/QIRSpec.cpp b/lib/Verifier/QIRSpec.cpp index 8d028b25516..4c016bdaaa2 100644 --- a/lib/Verifier/QIRSpec.cpp +++ b/lib/Verifier/QIRSpec.cpp @@ -11,6 +11,7 @@ #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Target/LLVMIR/Export.h" diff --git a/pyproject.toml.cu12 b/pyproject.toml.cu12 index 96cd0a73ace..b65219d4ebd 100644 --- a/pyproject.toml.cu12 +++ b/pyproject.toml.cu12 @@ -24,7 +24,7 @@ dependencies = [ 'cudensitymat-cu12 ~= 0.5.1', 'numpy >= 1.24', 'scipy >= 1.10.1', - 'requests >= 2.32.4', + 'requests >= 2.32.3', 'nvidia-cublas-cu12 ~= 12.0', 'nvidia-curand-cu12 ~= 10.3', 'nvidia-cusparse-cu12 ~= 12.5', @@ -62,7 +62,7 @@ visualization = [ "qutip>5" , "matplotlib>=3.5" ] integrators = [ "torchdiffeq" ] [build-system] -requires = ["scikit-build-core==0.11.6", "cmake>=3.27,<3.29", "numpy>=1.24", "pytest==9.0.3"] +requires = ["scikit-build-core==0.11.6", "cmake>=3.27,<3.29", "numpy>=1.24", "pytest==9.0.3", "nanobind>=2.9.0"] build-backend = "scikit_build_core.build" [tool.scikit-build] @@ -87,5 +87,16 @@ if.platform-machine = "x86_64" inherit.cmake.args = "append" cmake.args = ["-DCUDAQ_ENABLE_PASQAL_QRMI_CONNECTOR=ON"] +# Linux: use LLD as the linker +[[tool.scikit-build.overrides]] +if.platform-system = "linux" +inherit.cmake.args = "append" +cmake.args = [ + "-DLLVM_USE_LINKER=lld", + "-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld -B/usr/local/llvm/bin", + "-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld -B/usr/local/llvm/bin", + "-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld -B/usr/local/llvm/bin", +] + [tool.setuptools_scm] write_to = "_version.py" diff --git a/pyproject.toml.cu13 b/pyproject.toml.cu13 index fc226812534..c811fbfe5cc 100644 --- a/pyproject.toml.cu13 +++ b/pyproject.toml.cu13 @@ -21,7 +21,7 @@ dependencies = [ 'astpretty ~= 3.0', 'numpy >= 1.24', 'scipy >= 1.10.1', - 'requests >= 2.32.4', + 'requests >= 2.32.3', # CUDA dependencies - excluded on macOS (CPU-only support) 'custatevec-cu13 ~= 1.13.1; sys_platform != "darwin"', 'cutensornet-cu13 ~= 2.12.1; sys_platform != "darwin"', @@ -64,7 +64,7 @@ visualization = [ "qutip>5" , "matplotlib>=3.5" ] integrators = [ "torchdiffeq" ] [build-system] -requires = ["scikit-build-core==0.11.6", "cmake>=3.27,<3.29", "numpy>=1.24", "pytest==9.0.3"] +requires = ["scikit-build-core==0.11.6", "cmake>=3.27,<3.29", "numpy>=1.24", "pytest==9.0.3", "nanobind>=2.9.0"] build-backend = "scikit_build_core.build" [tool.scikit-build] @@ -89,6 +89,17 @@ if.platform-machine = "x86_64" inherit.cmake.args = "append" cmake.args = ["-DCUDAQ_ENABLE_PASQAL_QRMI_CONNECTOR=ON"] +# Linux: use LLD as the linker +[[tool.scikit-build.overrides]] +if.platform-system = "linux" +inherit.cmake.args = "append" +cmake.args = [ + "-DLLVM_USE_LINKER=lld", + "-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld -B/usr/local/llvm/bin", + "-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld -B/usr/local/llvm/bin", + "-DCMAKE_MODULE_LINKER_FLAGS=-fuse-ld=lld -B/usr/local/llvm/bin", +] + # macOS: Disable symbol stripping. LLVM's JIT relies on local symbols for # internal data structures (eg., PassRegistry). On macOS # with it's two-level namespace they are removed by stripping which diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 2eaf4ee0d75..3dd993f587d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -8,10 +8,6 @@ add_subdirectory(utils) -if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") - set(CMAKE_BUILD_TYPE "Debug") -endif() - # [RFC]: # Check how to solve this better than just disable the warning for the whole directory. # If this is better addressed after updating to a newer LLVM version, track as an issue on GitHub. @@ -35,15 +31,22 @@ if (CUDA_FOUND) find_package(CUDAToolkit REQUIRED) endif() +if(LLVM_ENABLE_ASSERTIONS) + set(CUDAQ_ASSERTIONS_ENABLED "1") +else() + set(CUDAQ_ASSERTIONS_ENABLED "") +endif() + set(METADATA_FILE "${CMAKE_BINARY_DIR}/python/cudaq/_metadata.py" ) add_custom_target( CopyPythonFiles ALL COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_BINARY_DIR}/python - COMMAND ${CMAKE_COMMAND} - -DMETADATA_FILE="${METADATA_FILE}" - -DCUDA_VERSION_MAJOR=${CUDAToolkit_VERSION_MAJOR} + COMMAND ${CMAKE_COMMAND} + -DMETADATA_FILE="${METADATA_FILE}" + -DCUDA_VERSION_MAJOR=${CUDAToolkit_VERSION_MAJOR} + -DASSERTIONS_ENABLED=${CUDAQ_ASSERTIONS_ENABLED} -P ${CMAKE_CURRENT_SOURCE_DIR}/metadata.cmake DEPENDS ${PYTHON_SOURCES} BYPRODUCTS "${METADATA_FILE}" diff --git a/python/cudaq/__init__.py b/python/cudaq/__init__.py index afce1fb5832..8c675f9ea24 100644 --- a/python/cudaq/__init__.py +++ b/python/cudaq/__init__.py @@ -127,6 +127,46 @@ def _configure_cuda_library_paths() -> None: print("Could not find a suitable cuQuantum Python package.") pass + +def _patch_mlir_isinstance() -> None: + import builtins + + from .mlir._mlir_libs import _mlir as _mlir_ext + ir = _mlir_ext.ir + value_base = getattr(ir, "Value", None) + py_isinstance = builtins.isinstance + for name in dir(ir): + cls = getattr(ir, name) + if not py_isinstance(cls, type) or "isinstance" in cls.__dict__: + continue + static_typeid = None + try: + static_typeid = cls.static_typeid + except Exception: + pass + if static_typeid is not None: + + def _isinstance(other, _tid=static_typeid): + try: + return other.typeid == _tid + except Exception: + return False + elif value_base is not None and cls is not value_base and \ + issubclass(cls, value_base): + + def _isinstance(other, _cls=cls, _isinst=py_isinstance): + try: + return _isinst(other.maybe_downcast(), _cls) + except Exception: + return False + else: + continue + setattr(cls, "isinstance", staticmethod(_isinstance)) + + +_patch_mlir_isinstance() +del _patch_mlir_isinstance + # ============================================================================ # # Module Imports # ============================================================================ # diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py index 2f0aac710df..e7f58617f0f 100644 --- a/python/cudaq/kernel/ast_bridge.py +++ b/python/cudaq/kernel/ast_bridge.py @@ -2794,12 +2794,12 @@ def isExactCudaqDbgAstCall(func_node: ast.AST) -> bool: totalSize = arith.SubIOp(endVal, startVal).result if isDecrementing: - roundingOffset = arith.AddIOp(stepVal, one) + roundingOffset = arith.AddIOp(stepVal, one).result else: - roundingOffset = arith.SubIOp(stepVal, one) - totalSize = arith.AddIOp(totalSize, roundingOffset) + roundingOffset = arith.SubIOp(stepVal, one).result + totalSize = arith.AddIOp(totalSize, roundingOffset).result totalSize = arith.MaxSIOp( - zero, + zero.result, arith.DivSIOp(totalSize, stepVal).result).result # Create an array of i64 of the total size @@ -2815,7 +2815,7 @@ def isExactCudaqDbgAstCall(func_node: ast.AST) -> bool: # but we also need to keep track of a counter counter = cc.AllocaOp(cc.PointerType.get(iTy), TypeAttr.get(iTy)).result - cc.StoreOp(zero, counter) + cc.StoreOp(zero.result, counter) def bodyBuilder(iterVar): loadedCounter = cc.LoadOp(counter).result @@ -2824,7 +2824,8 @@ def bodyBuilder(iterVar): DenseI32ArrayAttr.get([kDynamicPtrIndex], context=self.ctx)) cc.StoreOp(iterVar, eleAddr) - incrementedCounter = arith.AddIOp(loadedCounter, one).result + incrementedCounter = arith.AddIOp(loadedCounter, + one.result).result cc.StoreOp(incrementedCounter, counter) self.createMonotonicForLoop(bodyBuilder, @@ -3610,19 +3611,25 @@ def check_vector_init(): cudaq_module = importlib.import_module('cudaq') channel_class = getattr(cudaq_module, node.args[0].attr) - numParams = channel_class.num_parameters + numParams = (channel_class.num_parameters + if hasattr(channel_class, + 'num_parameters') else + channel_class.get_num_parameters()) key = self.getConstantInt(hash(channel_class)) elif isinstance(node.args[0], ast.Name): arg = recover_value_of_or_none( node.args[0].id, self.defFrame) if (arg and isinstance(arg, type) and issubclass( arg, cudaq_runtime.KrausChannel)): - if not hasattr(arg, 'num_parameters'): + if (not hasattr(arg, 'num_parameters') and + not hasattr(arg, 'get_num_parameters')): self.emitFatalError( 'apply_noise kraus channels must have ' '`num_parameters` constant class ' 'attribute specified.') - numParams = arg.num_parameters + numParams = (arg.num_parameters if hasattr( + arg, 'num_parameters') else + arg.get_num_parameters()) key = self.getConstantInt(hash(arg)) if key is None: self.emitFatalError( @@ -4823,10 +4830,10 @@ def compare_equality(item1, item2): if ComplexType.isinstance(item1.type): reComp = arith.CmpFOp(fCondPred, complex.ReOp(item1).result, - complex.ReOp(item2).result) + complex.ReOp(item2).result).result imComp = arith.CmpFOp(fCondPred, complex.ImOp(item1).result, - complex.ImOp(item2).result) + complex.ImOp(item2).result).result return arith.AndIOp(reComp, imComp).result elif IntegerType.isinstance(item1.type): return arith.CmpIOp(iCondPred, item1, item2).result @@ -5515,8 +5522,13 @@ def compile_to_mlir(uniqueId, astModule, signature: KernelSignature, defFrame, if verbose: print(bridge.module) # Clear the live operations cache. This avoids python crashing with - # stale references being cached. - bridge.module.context._clear_live_operations() + # stale references being cached. (MLIR 22+ may expose this as + # clear_live_operations instead of _clear_live_operations.) + ctx = bridge.module.context + clear_fn = getattr(ctx, '_clear_live_operations', None) or getattr( + ctx, 'clear_live_operations', None) + if clear_fn is not None: + clear_fn() # The only MLIR code object wrapped & tracked ought to be `newMod` now. cudaq_runtime.set_data_layout(bridge.module) return bridge.module diff --git a/python/cudaq/kernel/kernel_builder.py b/python/cudaq/kernel/kernel_builder.py index 68627126007..64608e50e50 100644 --- a/python/cudaq/kernel/kernel_builder.py +++ b/python/cudaq/kernel/kernel_builder.py @@ -1553,12 +1553,24 @@ def process_channel_param(self, param): else: emitFatalError("Noise channel parameter must be float") + @staticmethod + def _get_num_parameters(noise_channel): + """Return the `num_parameters` for a noise channel class, + supporting both the attribute (custom channels) and the + method (nanobind-bound built-in channels).""" + if hasattr(noise_channel, 'num_parameters'): + return noise_channel.num_parameters + if hasattr(noise_channel, 'get_num_parameters'): + return noise_channel.get_num_parameters() + return None + @staticmethod def _validate_noise_channel_probability_params(noise_channel, param_values): """ Raise `RuntimeError` if any `param` is a constant float outside [0, 1]. """ - if not hasattr(noise_channel, 'num_parameters'): + if not (hasattr(noise_channel, 'num_parameters') or + hasattr(noise_channel, 'get_num_parameters')): return for p in param_values: if isinstance(p, (int, float)): @@ -1578,17 +1590,19 @@ def apply_noise(self, noise_channel, *args): self.appliedNoiseChannels.append(noise_channel) if not issubclass(noise_channel, cudaq_runtime.KrausChannel): - if not hasattr(noise_channel, 'num_parameters'): + if not (hasattr(noise_channel, 'num_parameters') or + hasattr(noise_channel, 'get_num_parameters')): emitFatalError( 'apply_noise kraus channels must have `num_parameters` ' 'constant class attribute specified.') + n_params = self._get_num_parameters(noise_channel) # We needs to have noise channel parameters + qubit arguments if isinstance(args[0], list): - if len(args[0]) != noise_channel.num_parameters: + if len(args[0]) != n_params: emitFatalError(f"Invalid number of arguments passed to " f"apply_noise for channel `{noise_channel}`") - elif len(args) <= noise_channel.num_parameters: + elif len(args) <= n_params: emitFatalError(f"Invalid number of arguments passed to " f"apply_noise for channel `{noise_channel}`") @@ -1612,11 +1626,12 @@ def apply_noise(self, noise_channel, *args): emitFatalError("Invalid qubit operand type") target_qubits.append(p.mlirValue) else: - param_values = args[:noise_channel.num_parameters] + n_params = self._get_num_parameters(noise_channel) + param_values = args[:n_params] self._validate_noise_channel_probability_params( noise_channel, param_values) for i, p in enumerate(args): - if i < noise_channel.num_parameters: + if i < n_params: noise_channel_params.append( self.process_channel_param(p)) else: diff --git a/python/cudaq/kernel/kernel_decorator.py b/python/cudaq/kernel/kernel_decorator.py index 98787d1844f..a706760fc4c 100644 --- a/python/cudaq/kernel/kernel_decorator.py +++ b/python/cudaq/kernel/kernel_decorator.py @@ -305,7 +305,7 @@ def merge_kernel(self, otherMod): for op in newMod.body: if isinstance(op, func.FuncOp): for attr in op.attributes: - if 'cudaq-entrypoint' == attr.name: + if 'cudaq-entrypoint' == attr: name = op.name.value.removeprefix(nvqppPrefix) break @@ -327,7 +327,7 @@ def merge_quake_source(self, quakeText): for op in newMod.body: if isinstance(op, func.FuncOp): for attr in op.attributes: - if 'cudaq-entrypoint' == attr.name: + if 'cudaq-entrypoint' == attr: name = op.name.value.removeprefix(nvqppPrefix) break diff --git a/python/cudaq/mlir/dialects/CCOps.td b/python/cudaq/mlir/dialects/CCOps.td index db5f1469beb..7822ababa66 100644 --- a/python/cudaq/mlir/dialects/CCOps.td +++ b/python/cudaq/mlir/dialects/CCOps.td @@ -9,7 +9,6 @@ #ifndef PYTHON_BINDINGS_CC_OPS #define PYTHON_BINDINGS_CC_OPS -include "mlir/Bindings/Python/Attributes.td" include "cudaq/Optimizer/Dialect/CC/CCOps.td" #endif diff --git a/python/cudaq/mlir/dialects/QuakeOps.td b/python/cudaq/mlir/dialects/QuakeOps.td index 6552c781014..e7ef1d46ab4 100644 --- a/python/cudaq/mlir/dialects/QuakeOps.td +++ b/python/cudaq/mlir/dialects/QuakeOps.td @@ -9,7 +9,6 @@ #ifndef PYTHON_BINDINGS_QUAKE_OPS #define PYTHON_BINDINGS_QUAKE_OPS -include "mlir/Bindings/Python/Attributes.td" include "cudaq/Optimizer/Dialect/Quake/QuakeOps.td" #endif diff --git a/python/cudaq/runtime/sample.py b/python/cudaq/runtime/sample.py index 21975599e43..4957336721d 100644 --- a/python/cudaq/runtime/sample.py +++ b/python/cudaq/runtime/sample.py @@ -92,8 +92,10 @@ def _detail_check_conditionals_on_measure(kernel): # Only check for kernels that can be compiled, not library-mode kernels (e.g., photonics) if kernel.supports_compilation(): for operation in kernel.qkeModule.body.operations: - if (hasattr(operation, 'name') and nvqppPrefix + kernel.uniqName - == operation.name.value and + op_name = getattr(operation.name, + 'value', operation.name) if hasattr( + operation, 'name') else None + if (op_name == nvqppPrefix + kernel.uniqName and 'qubitMeasurementFeedback' in operation.attributes): has_conditionals_on_measure_result = True elif isinstance(kernel, PyKernel) and kernel.conditionalOnMeasure: diff --git a/python/extension/CMakeLists.txt b/python/extension/CMakeLists.txt index da035cc75cf..5873bae9597 100644 --- a/python/extension/CMakeLists.txt +++ b/python/extension/CMakeLists.txt @@ -6,53 +6,23 @@ # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # -if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") - set(CMAKE_BUILD_TYPE "Debug") -endif() - include(HandleLLVMOptions) include(AddMLIRPython) -function(add_mlir_python_extension libname extname) - cmake_parse_arguments(ARG - "" - "INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY" - "SOURCES;LINK_LIBS" - ${ARGN}) - - # Use nanobind for CUDA-Q's own extension (_quakeDialects) and pybind11 - # for upstream MLIR extensions (AsyncPasses, RegisterEverything, etc.). - if(libname MATCHES "_quakeDialects") - nanobind_add_module(${libname} NB_STATIC ${ARG_SOURCES}) - target_compile_options(${libname} PRIVATE -frtti -fexceptions -Wno-cast-qual) - else() - pybind11_add_module(${libname} MODULE ${ARG_SOURCES}) - target_compile_options(${libname} PRIVATE -frtti -fexceptions) - endif() - - set_target_properties(${libname} PROPERTIES - LIBRARY_OUTPUT_DIRECTORY ${ARG_OUTPUT_DIRECTORY} - OUTPUT_NAME "${extname}" - NO_SONAME ON - ) - - target_link_libraries(${libname} PRIVATE ${ARG_LINK_LIBS}) - target_link_options(${libname} PRIVATE - $<$:LINKER:--exclude-libs,ALL> - ) - - if(ARG_INSTALL_DIR) - install(TARGETS ${libname} - COMPONENT ${ARG_INSTALL_COMPONENT} - LIBRARY DESTINATION "${ARG_INSTALL_DIR}" - RUNTIME DESTINATION "${ARG_INSTALL_DIR}" - ) - endif() -endfunction() +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-Wdeprecated-literal-operator" + CUDAQ_HAS_WDEPRECATED_LITERAL_OPERATOR) +if(NOT CUDAQ_HAS_WDEPRECATED_LITERAL_OPERATOR) + add_compile_options(-Wno-unknown-warning-option) +endif() # Specifies that all MLIR packages are co-located under the cudaq # top level package (the API has been embedded in a relocatable way). add_compile_definitions("MLIR_PYTHON_PACKAGE_PREFIX=cudaq.mlir.") +# Mark QPU sources compiled into the Python extension so they use the +# cross-DSO registry hook (cudaq_add_qpu_node) instead of the local +# CUDAQ_REGISTER_TYPE which would register into the wrong DSO. +add_compile_definitions("CUDAQ_PYTHON_EXTENSION") ################################################################################ # Sources @@ -64,14 +34,50 @@ declare_mlir_dialect_python_bindings( ADD_TO_PARENT CUDAQuantumPythonSources ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../cudaq/mlir" TD_FILE dialects/QuakeOps.td + SOURCES + dialects/quake.py DIALECT_NAME quake) declare_mlir_dialect_python_bindings( ADD_TO_PARENT CUDAQuantumPythonSources ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../cudaq/mlir" TD_FILE dialects/CCOps.td + SOURCES + dialects/cc.py DIALECT_NAME cc) +if(APPLE) + set(_quakeDialects_mlir_runtime_sources "") + set(_quakeDialects_extra_link_libs + cudaq-mlir-runtime + MLIRPass + CUDAQTargetConfigUtil + cudaq-python-interop + cudaq-platform-default + ) + set(_quakeDialects_pipeline_carrying_link_libs "") +else() + set(_quakeDialects_mlir_runtime_sources + ../../runtime/internal/compiler/ArgumentConversion.cpp + ../../runtime/internal/compiler/CompiledModuleHelper.cpp + ../../runtime/internal/compiler/LayoutInfo.cpp + ../../runtime/internal/compiler/RuntimeMLIR.cpp + ../../runtime/internal/compiler/JIT.cpp + ../../runtime/internal/compiler/Compiler.cpp + ../../runtime/internal/compiler/TracePassInstrumentation.cpp + ) + set(_quakeDialects_extra_link_libs "") + set(_quakeDialects_pipeline_carrying_link_libs + OptCodeGen + OptTransforms + MLIRPass + CUDAQTargetConfigUtil + cudaq-python-interop + cudaq-platform-default + cudaq-qir-verifier + ) +endif() + declare_mlir_python_extension(CUDAQuantumPythonSources.Extension MODULE_NAME _quakeDialects ADD_TO_PARENT CUDAQuantumPythonSources @@ -130,27 +136,15 @@ declare_mlir_python_extension(CUDAQuantumPythonSources.Extension ../../runtime/cudaq/platform/fermioniq/FermioniqQPU.cpp ../../runtime/cudaq/platform/default/rest/RemoteRESTQPU.cpp ../../runtime/cudaq/platform/default/python/QPU.cpp - ../../runtime/internal/compiler/ArgumentConversion.cpp - ../../runtime/internal/compiler/CompiledModuleHelper.cpp - ../../runtime/internal/compiler/LayoutInfo.cpp - ../../runtime/internal/compiler/RuntimeMLIR.cpp + ${_quakeDialects_mlir_runtime_sources} ../../runtime/internal/compiler/RuntimePyMLIR.cpp - ../../runtime/internal/compiler/JIT.cpp - ../../runtime/internal/compiler/Compiler.cpp - ../../runtime/internal/compiler/TracePassInstrumentation.cpp EMBED_CAPI_LINK_LIBS CUDAQuantumMLIRCAPI MLIRCAPIExecutionEngine PRIVATE_LINK_LIBS - OptCodeGen - OptTransforms - MLIRPass - CUDAQTargetConfigUtil - cudaq-python-interop - cudaq-platform-default - cudaq-qir-verifier - cudaq-mlir-runtime-headers + ${_quakeDialects_pipeline_carrying_link_libs} + ${_quakeDialects_extra_link_libs} ) # MLIR/LLVM is built without RTTI (LLVM_ENABLE_RTTI=OFF). This file subclasses @@ -166,6 +160,7 @@ target_include_directories(CUDAQuantumPythonSources.Extension INTERFACE ${CMAKE_SOURCE_DIR}/python ${CMAKE_SOURCE_DIR}/python/utils ${CMAKE_SOURCE_DIR}/runtime + ${CMAKE_SOURCE_DIR}/runtime/internal/compiler/include ) target_link_libraries(CUDAQuantumPythonSources.Extension INTERFACE cudaq @@ -174,8 +169,10 @@ target_link_libraries(CUDAQuantumPythonSources.Extension INTERFACE cudaq-em-default cudaq-em-photonics fmt::fmt-header-only - unzip_util ) +if (CUDAQ_ENABLE_REST) + target_link_libraries(CUDAQuantumPythonSources.Extension INTERFACE unzip_util) +endif() ################################################################################ # Common CAPI @@ -192,12 +189,29 @@ add_mlir_python_common_capi_library(CUDAQuantumPythonCAPI # available. MLIRPythonExtension.RegisterEverything MLIRPythonSources.Core + # Include full MLIRPythonSources so dialect extensions' EMBED_CAPI_LINK_LIBS + # (e.g. obj.MLIRCAPILLVM for the LLVM dialect) are embedded into the common + # CAPI lib. Otherwise _mlirDialectsLLVM.so fails with undefined symbol + # mlirTypeIsALLVMStructType at runtime. + MLIRPythonSources ) +if(APPLE) + target_link_options(CUDAQuantumPythonCAPI PRIVATE + "LINKER:-flat_namespace" + "LINKER:-undefined,dynamic_lookup") +endif() + ################################################################################ # Instantiation of Python module ################################################################################ +# This variable is unused in cudaq but if it is not set, we hit a bug in +# add_mlir_python_modules whereby it is defined twice on the compilation line: +# -DMLIR_BINDINGS_PYTHON_NB_DOMAIN "" -DMLIR_BINDINGS_PYTHON_NB_DOMAIN mlir +# which results in a compilation error. +set(MLIR_BINDINGS_PYTHON_NB_DOMAIN "cudaq") + add_mlir_python_modules(CUDAQuantumPythonModules ROOT_PREFIX "${MLIR_BINARY_DIR}/python/cudaq/mlir" INSTALL_PREFIX "cudaq/mlir" @@ -211,11 +225,94 @@ add_mlir_python_modules(CUDAQuantumPythonModules CUDAQuantumPythonCAPI ) -if(TARGET nanobind-static) - target_compile_options(nanobind-static PRIVATE -Wno-cast-qual -Wno-covered-switch-default) +# Suppress warnings-as-errors for upstream MLIR Python extension sources +# that have minor GCC warnings (address-of-function, parentheses) in LLVM 22. +foreach(_cudaq_py_ext_target + CUDAQuantumPythonModules.extension._mlir.dso + CUDAQuantumPythonModules.extension.MLIRPythonSupport-cudaq.so) + if(TARGET ${_cudaq_py_ext_target}) + target_compile_options(${_cudaq_py_ext_target} PRIVATE + -Wno-error=address -Wno-error=parentheses) + endif() +endforeach() + +# Upstream MLIR's add_mlir_python_extension sets `-Wl,--exclude-libs,ALL` on +# every extension, which hides the symbols pulled in from the static MLIR +# archives from the extension's dynamic symbol table. For upstream extensions +# that only use CAPI functions this is fine, but CUDA-Q's _quakeDialects.so +# calls MLIR C++ APIs directly (e.g. StringAttr::get in py_register_dialects +# and CUDAQuantumExtension). Those calls reference template statics like +# `mlir::detail::TypeIDResolver::id` which are GNU UNIQUE symbols with +# default visibility. When `--exclude-libs,ALL` hides them, each DSO ends up +# with its own private copy, and the TypeID used inside _quakeDialects.so no +# longer matches CAPI's — tripping "storage uniquer isn't initialized" and +# "different dialects for the same namespace" errors at runtime. +# +# Strip that option so the UNIQUE statics stay in the dynamic symbol table +# and the runtime linker unifies them with libCUDAQuantumPythonCAPI.so's +# copy at load time. +if(TARGET CUDAQuantumPythonModules.extension._quakeDialects.dso) + # 1) Strip --exclude-libs,ALL so the MLIR template statics (e.g. + # mlir::detail::TypeIDResolver::id) that come in via the MLIR static + # archives stay in the dynamic symbol table. These are STB_GLOBAL + # (default visibility) COMDAT symbols; with --exclude-libs,ALL they are + # demoted to local and each DSO ends up with its own private copy. + get_target_property(_qd_link_options + CUDAQuantumPythonModules.extension._quakeDialects.dso LINK_OPTIONS) + if(_qd_link_options) + list(REMOVE_ITEM _qd_link_options + "$<$:LINKER:--exclude-libs,ALL>" + "LINKER:--exclude-libs,ALL" + "LINKER:-twolevel_namespace") + set_target_properties(CUDAQuantumPythonModules.extension._quakeDialects.dso + PROPERTIES LINK_OPTIONS "${_qd_link_options}") + endif() + + # 2) Prepend libCUDAQuantumPythonCAPI.so to the link line so ld's archive + # extraction finds MLIR symbols in the (shared) CAPI before scanning the + # static archives. When CAPI already defines `mlir::StringAttr::get`, + # `mlir::detail::TypeIDResolver::id`, etc., the matching .o files in + # libMLIRIR.a are not pulled in, so _quakeDialects.so has no private + # copies and its references resolve to CAPI at runtime — keeping the + # TypeID addresses consistent with the ones CAPI used when constructing + # the MLIRContext. + target_link_options(CUDAQuantumPythonModules.extension._quakeDialects.dso + BEFORE PRIVATE + "$") + + if(APPLE) + target_link_options(CUDAQuantumPythonModules.extension._quakeDialects.dso + PRIVATE + "LINKER:-flat_namespace" + "LINKER:-undefined,dynamic_lookup") + endif() endif() -## The Python bindings module for Quake dialect depends on CUDAQ libraries +if(TARGET cudaq-mlir-runtime AND TARGET CUDAQuantumPythonCAPI) + get_target_property(_mr_link_options cudaq-mlir-runtime LINK_OPTIONS) + if(_mr_link_options) + list(REMOVE_ITEM _mr_link_options + "$<$:LINKER:--exclude-libs,ALL>" + "LINKER:--exclude-libs,ALL" + "LINKER:-twolevel_namespace") + set_target_properties(cudaq-mlir-runtime + PROPERTIES LINK_OPTIONS "${_mr_link_options}") + endif() + target_link_options(cudaq-mlir-runtime BEFORE PRIVATE + "$") + if(APPLE) + target_link_options(cudaq-mlir-runtime PRIVATE + "LINKER:-undefined,dynamic_lookup") + else() + target_link_libraries(cudaq-mlir-runtime INTERFACE + $) + endif() + set_property(TARGET cudaq-mlir-runtime APPEND PROPERTY + BUILD_RPATH "$") + add_dependencies(cudaq-mlir-runtime CUDAQuantumPythonCAPI) +endif() + +## The Python bindings module for Quake dialect depends on CUDAQ libraries ## which it can't locate since they are in "../../lib" and the 'rpath' is set ## to '$ORIGIN' by default. ## macOS uses @loader_path instead of $ORIGIN for RPATH. @@ -225,6 +322,15 @@ else() set(_origin_prefix "$ORIGIN") endif() +## Retain all linked libraries (e.g. libcudaq) so that static initializers +## (ModuleLauncher registry and PythonLauncher registration) run and resolve +## in the same process. Without --no-as-needed the linker may drop libcudaq +## and the launcher is never registered. +if(CUDAQ_FORCE_LINK_FLAG) + target_link_options(CUDAQuantumPythonCAPI PRIVATE + ${CUDAQ_FORCE_LINK_FLAG}) +endif() + if (NOT SKBUILD) list(APPEND CMAKE_INSTALL_RPATH "${_origin_prefix}/../../lib" "${_origin_prefix}/../../lib/plugins") set_property(TARGET CUDAQuantumPythonModules.extension._quakeDialects.dso diff --git a/python/extension/CUDAQuantumExtension.cpp b/python/extension/CUDAQuantumExtension.cpp index a1a581b680d..8085ab79bf8 100644 --- a/python/extension/CUDAQuantumExtension.cpp +++ b/python/extension/CUDAQuantumExtension.cpp @@ -48,8 +48,8 @@ #include "runtime/interop/PythonCppInteropDecls.h" #include "runtime/mlir/py_register_dialects.h" #include "utils/LinkedLibraryHolder.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "mlir/CAPI/Pass.h" #include "mlir/Parser/Parser.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -65,7 +65,12 @@ using namespace cudaq; static std::unique_ptr holder; +extern "C" void cudaq_ensure_default_launcher_linked(void); + NB_MODULE(_quakeDialects, m) { + // Ensure the TU that registers PythonLauncher ("default") is linked so + // kernel launches work without an explicit set_target(). + cudaq_ensure_default_launcher_linked(); holder = std::make_unique(); bindRegisterDialects(m); diff --git a/python/metadata.cmake b/python/metadata.cmake index a8fb2a8f4cd..8a8e67775d1 100644 --- a/python/metadata.cmake +++ b/python/metadata.cmake @@ -20,3 +20,9 @@ if(CUDA_VERSION_MAJOR) else() file(WRITE ${METADATA_FILE} "cuda_major=None") endif() + +if(ASSERTIONS_ENABLED) + file(APPEND ${METADATA_FILE} "\nassertions_enabled=True") +else() + file(APPEND ${METADATA_FILE} "\nassertions_enabled=False") +endif() diff --git a/python/runtime/common/py_AnalogHamiltonian.cpp b/python/runtime/common/py_AnalogHamiltonian.cpp index ec182338e03..696687994e8 100644 --- a/python/runtime/common/py_AnalogHamiltonian.cpp +++ b/python/runtime/common/py_AnalogHamiltonian.cpp @@ -9,6 +9,7 @@ #include "py_AnalogHamiltonian.h" #include "common/AnalogHamiltonian.h" #include "common/JsonConvert.h" +#include #include #include #include diff --git a/python/runtime/common/py_ExecutionContext.cpp b/python/runtime/common/py_ExecutionContext.cpp index 132462462de..b21101d7f7b 100644 --- a/python/runtime/common/py_ExecutionContext.cpp +++ b/python/runtime/common/py_ExecutionContext.cpp @@ -129,14 +129,19 @@ void bindExecutionContext(nanobind::module_ &mod) { nanobind::arg("qpuId") = 0); mod.def("getQirOutputLog", []() { return nvqir::getQirOutputLog(); }); mod.def("clearQirOutputLog", []() { nvqir::clearQirOutputLog(); }); - mod.def("decodeQirOutputLog", - [](const std::string &outputLog, nanobind::bytearray decodedResults) { - cudaq::RecordLogParser parser; - parser.parse(outputLog); - auto *origBuffer = parser.getBufferPtr(); - const std::size_t bufferSize = parser.getBufferSize(); - std::memcpy(decodedResults.data(), origBuffer, bufferSize); - }); + mod.def("decodeQirOutputLog", [](const std::string &outputLog, + nanobind::object decodedResults) { + cudaq::RecordLogParser parser; + parser.parse(outputLog); + Py_buffer view; + if (PyObject_GetBuffer(decodedResults.ptr(), &view, PyBUF_WRITABLE) != 0) + throw nanobind::python_error(); + // Get the buffer and length of buffer (in bytes) from the parser. + auto *origBuffer = parser.getBufferPtr(); + const std::size_t bufferSize = parser.getBufferSize(); + std::memcpy(view.buf, origBuffer, bufferSize); + PyBuffer_Release(&view); + }); nanobind::class_( mod, "reuse_compiler_artifacts", diff --git a/python/runtime/common/py_SampleResult.cpp b/python/runtime/common/py_SampleResult.cpp index 47b65d5226e..df1785d0eb5 100644 --- a/python/runtime/common/py_SampleResult.cpp +++ b/python/runtime/common/py_SampleResult.cpp @@ -83,11 +83,11 @@ terminal measurements. .def( "__iter__", [](sample_result &self) { - return nanobind::make_key_iterator(nanobind::type(), - "key_iterator", self.begin(), - self.end()); + nanobind::list keys; + for (auto it = self.begin(); it != self.end(); ++it) + keys.append(nanobind::cast(it->first)); + return keys.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Iterate through the :class:`SampleResult` dictionary.\n") .def("expectation", &sample_result::expectation, nanobind::arg("register_name") = GlobalRegisterName, @@ -182,21 +182,21 @@ qubits (`marginal_indices`). .def( "items", [](sample_result &self) { - return nanobind::make_iterator(nanobind::type(), - "item_iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::make_tuple(it->first, it->second)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Return the key/value pairs in this :class:`SampleResult` " "dictionary.\n") .def( "values", [](sample_result &self) { - return nanobind::make_value_iterator( - nanobind::type(), "value_iterator", self.begin(), - self.end()); + nanobind::list values; + for (auto it = self.begin(); it != self.end(); ++it) + values.append(nanobind::cast(it->second)); + return values.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Return all values (the counts) in this :class:`SampleResult` " "dictionary.\n") .def(nanobind::self += nanobind::self) diff --git a/python/runtime/cudaq/algorithms/py_evolve.cpp b/python/runtime/cudaq/algorithms/py_evolve.cpp index 80e54f3edc7..eac8cebf668 100644 --- a/python/runtime/cudaq/algorithms/py_evolve.cpp +++ b/python/runtime/cudaq/algorithms/py_evolve.cpp @@ -11,8 +11,8 @@ #include "cudaq/algorithms/evolve_internal.h" #include "cudaq/runtime/logger/logger.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "mlir/CAPI/IR.h" #include #include diff --git a/python/runtime/cudaq/algorithms/py_observe_async.cpp b/python/runtime/cudaq/algorithms/py_observe_async.cpp index 1b134f5731e..faa4b114e78 100644 --- a/python/runtime/cudaq/algorithms/py_observe_async.cpp +++ b/python/runtime/cudaq/algorithms/py_observe_async.cpp @@ -13,14 +13,13 @@ #include "cudaq/Todo.h" #include "cudaq/algorithms/observe.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "mlir/CAPI/IR.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include #include #include -#include #include using namespace cudaq; @@ -131,8 +130,10 @@ pyObservePar(const PyParType &type, const std::string &shortName, printf( "[cudaq::observe warning] distributed observe requested but only 1 " "QPU available. no speedup expected.\n"); + nanobind::gil_scoped_release release; return details::distributeComputations( [&](std::size_t i, const spin_op &op) { + nanobind::gil_scoped_acquire acquire; return pyObserveAsync(shortName, module, op, i, shots, args); }, spin_operator, nQpus); @@ -154,8 +155,10 @@ pyObservePar(const PyParType &type, const std::string &shortName, auto localH = spins[rank]; // Distribute locally, i.e. to the local nodes QPUs + nanobind::gil_scoped_release release; auto localRankResult = details::distributeComputations( [&](std::size_t i, const spin_op &op) { + nanobind::gil_scoped_acquire acquire; return pyObserveAsync(shortName, module, op, i, shots, args); }, localH, nQpus); @@ -170,12 +173,12 @@ pyObservePar(const PyParType &type, const std::string &shortName, /// broadcast. All these variants are handled here. static observe_result observe_parallel_impl(const std::string &shortName, MlirModule module, - nanobind::type_object execution, + nanobind::object execution, spin_op &spin_operator, int shots, std::optional noise, nanobind::args arguments) { std::string applicatorKey = - nanobind::cast(execution.attr("__name__")); + std::string(nanobind::str(execution.attr("__name__")).c_str()); auto mod = unwrap(module); if (applicatorKey == "thread") return pyObservePar(PyParType::thread, shortName, mod, spin_operator, shots, @@ -207,5 +210,9 @@ void cudaq::bindObserveAsync(nanobind::module_ &mod) { "Test to see if the kernel is suited for use with observe."); mod.def("observe_parallel_impl", observe_parallel_impl, + nanobind::arg("shortName"), nanobind::arg("module"), + nanobind::arg("execution"), nanobind::arg("spin_operator"), + nanobind::arg("shots"), nanobind::arg("noise").none(), + nanobind::arg("arguments"), "See the python documentation for observe_parallel."); } diff --git a/python/runtime/cudaq/algorithms/py_optimizer.cpp b/python/runtime/cudaq/algorithms/py_optimizer.cpp index 339b33e81ae..39c390e6b28 100644 --- a/python/runtime/cudaq/algorithms/py_optimizer.cpp +++ b/python/runtime/cudaq/algorithms/py_optimizer.cpp @@ -16,6 +16,7 @@ #include "cudaq/algorithms/gradients/central_difference.h" #include "cudaq/algorithms/gradients/forward_difference.h" #include "cudaq/algorithms/gradients/parameter_shift.h" +#include "cudaq/algorithms/optimizer.h" #include "cudaq/algorithms/optimizers/ensmallen/ensmallen.h" #include "cudaq/algorithms/optimizers/nlopt/nlopt.h" #include "py_optimizer.h" @@ -23,12 +24,40 @@ namespace cudaq { -/// @brief optimization_result is a typedef for std::tuple> which is automatically converted by nanobind's -/// stl/tuple type caster. +/// Wrapper exposed as OptimizationResult so cudaq_runtime.OptimizationResult +/// exists for re-export and type hints. optimize() returns a plain tuple +/// (opt_value, opt_params); this type can wrap that for structured access. +struct OptimizationResultPy { + double opt_value = 0.0; + std::vector optimal_parameters; + + OptimizationResultPy() = default; + OptimizationResultPy(double v, std::vector p) + : opt_value(v), optimal_parameters(std::move(p)) {} + explicit OptimizationResultPy(const optimization_result &r) + : opt_value(std::get<0>(r)), optimal_parameters(std::get<1>(r)) {} +}; + void bindOptimizationResult(nanobind::module_ &mod) { - mod.attr("OptimizationResult") = - nanobind::handle(reinterpret_cast(&PyTuple_Type)); + nanobind::class_( + mod, "OptimizationResult", + "Result of an optimization: (opt_value, optimal_parameters). " + "optimize() returns a tuple; this type is for type hints and wrapping.") + .def(nanobind::init>(), + nanobind::arg("opt_value"), nanobind::arg("optimal_parameters")) + .def(nanobind::init(), + "Wrap a tuple (opt_value, optimal_parameters).") + .def_ro("opt_value", &OptimizationResultPy::opt_value) + .def_ro("optimal_parameters", &OptimizationResultPy::optimal_parameters) + .def("__getitem__", + [](const OptimizationResultPy &self, size_t i) -> nanobind::object { + if (i == 0) + return nanobind::cast(self.opt_value); + if (i == 1) + return nanobind::cast(self.optimal_parameters); + throw std::out_of_range("OptimizationResult index out of range"); + }) + .def("__len__", [](const OptimizationResultPy &) { return 2; }); } void bindGradientStrategies(nanobind::module_ &mod) { @@ -156,8 +185,24 @@ nanobind::class_ addPyOptimizer(nanobind::module_ &mod, the optimizer will perform. If not set, the optimizer may run until convergence or until another stopping criterion is met. )doc") - .def_rw("initial_parameters", &OptimizerT::initial_parameters, - R"doc( + .def_prop_rw( + "initial_parameters", + [](OptimizerT &self) -> nanobind::object { + if (self.initial_parameters.has_value()) + return nanobind::cast(self.initial_parameters.value()); + return nanobind::none(); + }, + [](OptimizerT &self, nanobind::object vals) { + if (vals.is_none()) { + self.initial_parameters = std::nullopt; + return; + } + std::vector v; + for (auto val : vals) + v.push_back(nanobind::cast(val)); + self.initial_parameters = std::move(v); + }, + R"doc( list[float]: Initial values for the optimization parameters (optional). Provides a starting point for the optimization. If not specified, the @@ -170,7 +215,24 @@ nanobind::class_ addPyOptimizer(nanobind::module_ &mod, optimizer.initial_parameters = [0.5, -0.3, 1.2] )doc") - .def_rw("lower_bounds", &OptimizerT::lower_bounds, R"doc( + .def_prop_rw( + "lower_bounds", + [](OptimizerT &self) -> nanobind::object { + if (self.lower_bounds.has_value()) + return nanobind::cast(self.lower_bounds.value()); + return nanobind::none(); + }, + [](OptimizerT &self, nanobind::object vals) { + if (vals.is_none()) { + self.lower_bounds = std::nullopt; + return; + } + std::vector v; + for (auto val : vals) + v.push_back(nanobind::cast(val)); + self.lower_bounds = std::move(v); + }, + R"doc( list[float]: Lower bounds for optimization parameters (optional). Constrains the search space by specifying minimum allowed values for @@ -182,7 +244,24 @@ nanobind::class_ addPyOptimizer(nanobind::module_ &mod, optimizer.lower_bounds = [-2.0, -2.0] # For 2D problem )doc") - .def_rw("upper_bounds", &OptimizerT::upper_bounds, R"doc( + .def_prop_rw( + "upper_bounds", + [](OptimizerT &self) -> nanobind::object { + if (self.upper_bounds.has_value()) + return nanobind::cast(self.upper_bounds.value()); + return nanobind::none(); + }, + [](OptimizerT &self, nanobind::object vals) { + if (vals.is_none()) { + self.upper_bounds = std::nullopt; + return; + } + std::vector v; + for (auto val : vals) + v.push_back(nanobind::cast(val)); + self.upper_bounds = std::move(v); + }, + R"doc( list[float]: Upper bounds for optimization parameters (optional). Constrains the search space by specifying maximum allowed values for diff --git a/python/runtime/cudaq/algorithms/py_resource_count.cpp b/python/runtime/cudaq/algorithms/py_resource_count.cpp index 53af2405cf5..ec52bb03c68 100644 --- a/python/runtime/cudaq/algorithms/py_resource_count.cpp +++ b/python/runtime/cudaq/algorithms/py_resource_count.cpp @@ -10,7 +10,7 @@ #include "common/Resources.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" #include "utils/LinkedLibraryHolder.h" -#include "utils/NanobindAdaptors.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include #include @@ -60,6 +60,7 @@ estimate_resources_impl(const std::string &kernelName, MlirModule kernelMod, } void cudaq::bindCountResources(nanobind::module_ &mod) { - mod.def("estimate_resources_impl", estimate_resources_impl, + mod.def("estimate_resources_impl", estimate_resources_impl, nanobind::arg(), + nanobind::arg(), nanobind::arg().none(), nanobind::arg(), "See python documentation for estimate_resources."); } diff --git a/python/runtime/cudaq/algorithms/py_run.cpp b/python/runtime/cudaq/algorithms/py_run.cpp index fb09b0b8e7a..70eb9cb2986 100644 --- a/python/runtime/cudaq/algorithms/py_run.cpp +++ b/python/runtime/cudaq/algorithms/py_run.cpp @@ -11,8 +11,8 @@ #include "cudaq/algorithms/run.h" #include "cudaq_internal/compiler/LayoutInfo.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include #include #include @@ -23,7 +23,6 @@ #include using namespace cudaq; -using namespace cudaq_internal::compiler; static std::vector readRunResults(mlir::ModuleOp module, mlir::Type ty, @@ -72,16 +71,17 @@ pyRunTheKernel(const std::string &name, quantum_platform &platform, // kernels. if (auto vecTy = dyn_cast(returnTy)) { auto elemTy = vecTy.getElementType(); - if (elemTy.isa()) + if (mlir::isa(elemTy)) throw std::runtime_error( "`cudaq.run` does not yet support returning nested `list` from " "entry-point kernels."); - if (elemTy.isa()) + if (mlir::isa(elemTy)) throw std::runtime_error("`cudaq.run` does not yet support returning " "`list` of `dataclass`/`tuple` from " "entry-point kernels."); } - auto layoutInfo = getLayoutInfo(name, mod.getOperation()); + auto layoutInfo = + cudaq_internal::compiler::getLayoutInfo(name, mod.getOperation()); auto results = details::runTheKernel( [&]() mutable { [[maybe_unused]] auto result = clean_launch_module(name, mod, opaques); @@ -242,7 +242,9 @@ run_async_impl(const std::string &shortName, MlirModule module, /// @brief Bind the run cudaq function. void cudaq::bindPyRun(nanobind::module_ &mod) { - mod.def("run_impl", run_impl, + mod.def("run_impl", run_impl, nanobind::arg(), nanobind::arg(), + nanobind::arg(), nanobind::arg().none(), nanobind::arg(), + nanobind::arg(), R"#( Run the provided `kernel` with the given kernel arguments over the specified number of circuit executions (`shots_count`). @@ -281,7 +283,9 @@ void cudaq::bindPyRunAsync(nanobind::module_ &mod) { }, "FIXME: documentation goes here"); - mod.def("run_async_impl", run_async_impl, + mod.def("run_async_impl", run_async_impl, nanobind::arg(), nanobind::arg(), + nanobind::arg(), nanobind::arg().none(), nanobind::arg(), + nanobind::arg(), R"#( Run the provided `kernel` with the given kernel arguments over the specified number of circuit executions (`shots_count`) asynchronously on the specified diff --git a/python/runtime/cudaq/algorithms/py_sample_async.cpp b/python/runtime/cudaq/algorithms/py_sample_async.cpp index d11969242f7..3429f7da3fb 100644 --- a/python/runtime/cudaq/algorithms/py_sample_async.cpp +++ b/python/runtime/cudaq/algorithms/py_sample_async.cpp @@ -10,8 +10,8 @@ #include "common/DeviceCodeRegistry.h" #include "cudaq/algorithms/sample.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "mlir/CAPI/IR.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include @@ -109,5 +109,10 @@ programming pattern. }, "FIXME: document"); - mod.def("sample_async_impl", sample_async_impl, "FIXME: document"); + mod.def("sample_async_impl", sample_async_impl, "FIXME: document", + nanobind::arg("short_name"), nanobind::arg("module"), + nanobind::arg("shots_count"), + nanobind::arg("noise_model").none() = std::nullopt, + nanobind::arg("explicit_measurements"), nanobind::arg("qpu_id"), + nanobind::arg("runtime_args")); } diff --git a/python/runtime/cudaq/algorithms/py_sample_ptsbe.cpp b/python/runtime/cudaq/algorithms/py_sample_ptsbe.cpp index 7d0c58e3b16..85126dbcd46 100644 --- a/python/runtime/cudaq/algorithms/py_sample_ptsbe.cpp +++ b/python/runtime/cudaq/algorithms/py_sample_ptsbe.cpp @@ -20,8 +20,8 @@ #include "cudaq/ptsbe/strategies/OrderedSamplingStrategy.h" #include "cudaq/ptsbe/strategies/ProbabilisticSamplingStrategy.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "mlir/CAPI/IR.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include @@ -36,13 +36,18 @@ using namespace cudaq; /// /// All PTSBE configuration is handled by the Python wrapper /// (cudaq.ptsbe.sample) and passed here as positional parameters. +// nanobind 2.x cannot dispatch NB_TYPE_CASTER-based parameters (MlirModule) +// when nanobind::object appears in the same function signature. Use concrete +// std::optional types for all nullable parameters instead. static ptsbe::sample_result pySamplePTSBE(const std::string &shortName, MlirModule module, std::size_t shots_count, noise_model noiseModel, std::optional max_trajectories, - nanobind::object sampling_strategy, - nanobind::object shot_allocation_obj, bool return_execution_data, - bool include_sequential_data, nanobind::args runtimeArgs) { + std::optional> + sampling_strategy, + std::optional shot_allocation, + bool return_execution_data, bool include_sequential_data, + nanobind::args runtimeArgs) { if (shots_count == 0) return ptsbe::sample_result(); @@ -51,14 +56,11 @@ pySamplePTSBE(const std::string &shortName, MlirModule module, ptsbe_options.include_sequential_data = include_sequential_data; ptsbe_options.max_trajectories = max_trajectories; - if (!sampling_strategy.is_none()) - ptsbe_options.strategy = - nanobind::cast>( - sampling_strategy); + if (sampling_strategy) + ptsbe_options.strategy = *sampling_strategy; - if (!shot_allocation_obj.is_none()) - ptsbe_options.shot_allocation = - nanobind::cast(shot_allocation_obj); + if (shot_allocation) + ptsbe_options.shot_allocation = *shot_allocation; auto mod = unwrap(module); runtimeArgs = simplifiedValidateInputArguments(runtimeArgs); @@ -108,26 +110,26 @@ struct AsyncPTSBESampleResultImpl { } // namespace /// @brief Run PTSBE sampling asynchronously from Python. -static AsyncPTSBESampleResultImpl pySampleAsyncPTSBE( - const std::string &shortName, MlirModule module, std::size_t shots_count, - noise_model &noiseModel, std::optional max_trajectories, - nanobind::object sampling_strategy, nanobind::object shot_allocation_obj, - bool return_execution_data, bool include_sequential_data, - nanobind::args runtimeArgs) { +static AsyncPTSBESampleResultImpl +pySampleAsyncPTSBE(const std::string &shortName, MlirModule module, + std::size_t shots_count, noise_model &noiseModel, + std::optional max_trajectories, + std::optional> + sampling_strategy, + std::optional shot_allocation, + bool return_execution_data, bool include_sequential_data, + nanobind::args runtimeArgs) { ptsbe::PTSBEOptions ptsbe_options; ptsbe_options.return_execution_data = return_execution_data; ptsbe_options.include_sequential_data = include_sequential_data; ptsbe_options.max_trajectories = max_trajectories; - if (!sampling_strategy.is_none()) - ptsbe_options.strategy = - nanobind::cast>( - sampling_strategy); + if (sampling_strategy) + ptsbe_options.strategy = *sampling_strategy; - if (!shot_allocation_obj.is_none()) - ptsbe_options.shot_allocation = - nanobind::cast(shot_allocation_obj); + if (shot_allocation) + ptsbe_options.shot_allocation = *shot_allocation; auto mod = unwrap(module); runtimeArgs = simplifiedValidateInputArguments(runtimeArgs); @@ -398,14 +400,15 @@ void cudaq::bindSamplePTSBE(nanobind::module_ &mod) { "Block until the PTSBE sampling result is available and return it."); // PTSBE sample implementation - ptsbe.def("sample_impl", pySamplePTSBE, nanobind::arg("kernel_name"), - nanobind::arg("module"), nanobind::arg("shots_count"), - nanobind::arg("noise_model"), nanobind::arg("max_trajectories"), - nanobind::arg("sampling_strategy").none(), - nanobind::arg("shot_allocation").none(), - nanobind::arg("return_execution_data"), - nanobind::arg("include_sequential_data"), - R"pbdoc( + ptsbe.def( + "sample_impl", pySamplePTSBE, nanobind::arg("kernel_name"), + nanobind::arg("module"), nanobind::arg("shots_count"), + nanobind::arg("noise_model"), nanobind::arg("max_trajectories").none(), + nanobind::arg("sampling_strategy").none(), + nanobind::arg("shot_allocation").none(), + nanobind::arg("return_execution_data"), + nanobind::arg("include_sequential_data"), nanobind::arg("arguments"), + R"pbdoc( Run PTSBE sampling on the provided kernel. Args: @@ -425,14 +428,14 @@ Run PTSBE sampling on the provided kernel. )pbdoc"); // PTSBE async sample implementation - ptsbe.def("sample_async_impl", pySampleAsyncPTSBE, - nanobind::arg("kernel_name"), nanobind::arg("module"), - nanobind::arg("shots_count"), nanobind::arg("noise_model"), - nanobind::arg("max_trajectories"), - nanobind::arg("sampling_strategy").none(), - nanobind::arg("shot_allocation").none(), - nanobind::arg("return_execution_data"), - nanobind::arg("include_sequential_data"), - "Run PTSBE sampling asynchronously. Returns an " - "AsyncSampleResultImpl."); + ptsbe.def( + "sample_async_impl", pySampleAsyncPTSBE, nanobind::arg("kernel_name"), + nanobind::arg("module"), nanobind::arg("shots_count"), + nanobind::arg("noise_model"), nanobind::arg("max_trajectories").none(), + nanobind::arg("sampling_strategy").none(), + nanobind::arg("shot_allocation").none(), + nanobind::arg("return_execution_data"), + nanobind::arg("include_sequential_data"), nanobind::arg("arguments"), + "Run PTSBE sampling asynchronously. Returns an " + "AsyncSampleResultImpl."); } diff --git a/python/runtime/cudaq/algorithms/py_state.cpp b/python/runtime/cudaq/algorithms/py_state.cpp index fff952c31e4..fd62bc02bb8 100644 --- a/python/runtime/cudaq/algorithms/py_state.cpp +++ b/python/runtime/cudaq/algorithms/py_state.cpp @@ -13,39 +13,14 @@ #include "cudaq/algorithms/get_state.h" #include "cudaq/runtime/logger/logger.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" -#include +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include -#include -#include -#include -#include -#include -#include -#include -#include using namespace cudaq; -// FIXME: This is using a thread unsafe global? -/// If we have any implicit device-to-host data transfers we will store that -/// data here and ensure it is deleted properly. -static std::vector>> - hostDataFromDevice; - -namespace { -// CuPy interop helpers. -struct BufferInfo { - void *ptr = nullptr; - std::size_t itemsize = 0; - std::string format; - std::vector shape; - std::vector strides; - bool readonly = false; - std::size_t size = 0; -}; -} // namespace +// Note: Removed unsafe global hostDataFromDevice vector. +// Ownership is now managed via nb::capsule per-array. static nanobind::dict getCupyArrayInterface(nanobind::handle cupyArray) { if (!nanobind::hasattr(cupyArray, "__cuda_array_interface__")) @@ -97,6 +72,21 @@ getCupyComplexTypeInfo(const std::string &typeStr) { ". Supported types are: shape; + std::vector strides; + bool readonly = false; + std::size_t size = 0; // total number of elements +}; +} // namespace + static BufferInfo getCupyBufferInfo(nanobind::object cupyArray) { auto cupyArrayInfo = getCupyArrayInterface(cupyArray); auto dataInfo = nanobind::cast(cupyArrayInfo["data"]); @@ -283,10 +273,6 @@ state pyGetStateRemote(nanobind::object kernel, nanobind::args args) { auto kernelMod = nanobind::cast(kernel.attr("qkeModule")); args = simplifiedValidateInputArguments(args); auto *argData = toOpaqueArgs(args, kernelMod, kernelName); -#if 0 - auto [argWrapper, size, returnOffset] = - pyCreateNativeKernel(kernelName, kernelMod, *argData); -#endif return state(new PyRemoteSimulationState(kernelName, /*argWrapper*/ {}, argData, /*size*/ 0, /*returnOffset*/ 0)); @@ -341,6 +327,13 @@ state pyGetStateLibraryMode(nanobind::object kernel, nanobind::args args) { }); } +// Helper to check if object is a CuPy array (has __cuda_array_interface__) +static bool isCupyArray(nanobind::object obj) { + return nanobind::hasattr(obj, "__cuda_array_interface__"); +} + +/// @brief Helper to get BufferInfo from a numpy array via Python buffer +/// protocol. static BufferInfo getNumpyBufferInfo(nanobind::object numpy_array) { auto dtype = numpy_array.attr("dtype"); std::string dtypeStr = nanobind::cast(dtype.attr("name")); @@ -365,10 +358,8 @@ static BufferInfo getNumpyBufferInfo(nanobind::object numpy_array) { } auto stridesTuple = nanobind::cast(numpy_array.attr("strides")); - for (std::size_t i = 0; i < stridesTuple.size(); i++) { + for (std::size_t i = 0; i < stridesTuple.size(); i++) info.strides.push_back(nanobind::cast(stridesTuple[i])); - } - // Get the raw data pointer via numpy's ctypes interface info.ptr = reinterpret_cast( nanobind::cast(numpy_array.attr("ctypes").attr("data"))); info.readonly = false; @@ -488,100 +479,67 @@ void cudaq::bindPyState(nanobind::module_ &mod, LinkedLibraryHolder &holder) { "via the `cudaq.get_state(...)` function or the static " "`cudaq.State.from_data()` method.\n") .def( - "__array__", - [](const state &self, nanobind::object dtype_obj, - nanobind::object copy_obj) { + "to_numpy", + [](const state &self) -> nanobind::object { if (self.get_num_tensors() != 1) throw std::runtime_error( "Numpy interop is only supported for vector " "and matrix state data."); - // This method enables interoperability with NumPy array data. - // We must be careful since the state data may actually be on GPU - // device. - - nanobind::module_ np = nanobind::module_::import_("numpy"); auto stateVector = self.get_tensor(); auto precision = self.get_precision(); - auto shape = self.get_tensor().extents; - - // Determine numpy dtype - nanobind::object np_dtype = - precision == SimulationState::precision::fp32 - ? np.attr("complex64") - : np.attr("complex128"); + std::vector shape(stateVector.extents.begin(), + stateVector.extents.end()); if (self.is_on_gpu()) { - // This is device data, transfer to host auto numElements = stateVector.get_num_elements(); - nanobind::object arr; + if (precision == SimulationState::precision::fp32) { auto *hostData = new std::complex[numElements]; self.to_host(hostData, numElements); - // Create numpy array and copy data - if (shape.size() != 1) { - nanobind::tuple np_shape = - nanobind::make_tuple(shape[0], shape[1]); - arr = np.attr("empty")(np_shape, np_dtype); - } else { - nanobind::tuple np_shape = nanobind::make_tuple(shape[0]); - arr = np.attr("empty")(np_shape, np_dtype); - } - auto *destPtr = reinterpret_cast *>( - nanobind::cast(arr.attr("ctypes").attr("data"))); - std::memcpy(destPtr, hostData, - numElements * sizeof(std::complex)); - delete[] hostData; + + nanobind::capsule owner(hostData, [](void *p) noexcept { + CUDAQ_INFO("freeing data that was copied from GPU device " + "for compatibility with NumPy"); + delete[] static_cast *>(p); + }); + + return nanobind::cast( + nanobind::ndarray>( + hostData, shape.size(), shape.data(), owner)); } else { auto *hostData = new std::complex[numElements]; self.to_host(hostData, numElements); - if (shape.size() != 1) { - nanobind::tuple np_shape = - nanobind::make_tuple(shape[0], shape[1]); - arr = np.attr("empty")(np_shape, np_dtype); - } else { - nanobind::tuple np_shape = nanobind::make_tuple(shape[0]); - arr = np.attr("empty")(np_shape, np_dtype); - } - auto *destPtr = reinterpret_cast *>( - nanobind::cast(arr.attr("ctypes").attr("data"))); - std::memcpy(destPtr, hostData, - numElements * sizeof(std::complex)); - delete[] hostData; - } - return arr; - } - // Host data path - wrap existing memory - void *dataPtr = self.get_tensor().data; - auto numElements = stateVector.get_num_elements(); - if (shape.size() != 1) { - nanobind::tuple np_shape = - nanobind::make_tuple(shape[0], shape[1]); - // Use np.frombuffer-like approach: create array from pointer - nanobind::object arr = np.attr("empty")(np_shape, np_dtype); - auto *destPtr = reinterpret_cast( - nanobind::cast(arr.attr("ctypes").attr("data"))); - std::size_t dataTypeSize = - precision == SimulationState::precision::fp32 - ? sizeof(std::complex) - : sizeof(std::complex); - std::memcpy(destPtr, dataPtr, numElements * dataTypeSize); - return arr; + nanobind::capsule owner(hostData, [](void *p) noexcept { + CUDAQ_INFO("freeing data that was copied from GPU device " + "for compatibility with NumPy"); + delete[] static_cast *>(p); + }); + + return nanobind::cast( + nanobind::ndarray>( + hostData, shape.size(), shape.data(), owner)); + } + } else { + if (precision == SimulationState::precision::fp32) { + return nanobind::cast( + nanobind::ndarray>( + stateVector.data, shape.size(), shape.data(), + nanobind::handle())); + } else { + return nanobind::cast( + nanobind::ndarray>( + stateVector.data, shape.size(), shape.data(), + nanobind::handle())); + } } - nanobind::tuple np_shape = nanobind::make_tuple(shape[0]); - nanobind::object arr = np.attr("empty")(np_shape, np_dtype); - auto *destPtr = reinterpret_cast( - nanobind::cast(arr.attr("ctypes").attr("data"))); - std::size_t dataTypeSize = - precision == SimulationState::precision::fp32 - ? sizeof(std::complex) - : sizeof(std::complex); - std::memcpy(destPtr, dataPtr, numElements * dataTypeSize); - return arr; }, - nanobind::arg("dtype") = nanobind::none(), - nanobind::arg("copy") = nanobind::none()) + "Convert to a NumPy array.") + .def("__array__", + [](nanobind::object self, nanobind::args, nanobind::kwargs) { + return self.attr("to_numpy")(); + }) .def( "__len__", [](state &self) { @@ -651,7 +609,6 @@ void cudaq::bindPyState(nanobind::module_ &mod, LinkedLibraryHolder &holder) { [](const std::vector &tensors) { TensorStateData tensorData; for (auto &tensor : tensors) { - tensorData.emplace_back( std::pair>{ tensor.data, tensor.extents}); @@ -659,6 +616,27 @@ void cudaq::bindPyState(nanobind::module_ &mod, LinkedLibraryHolder &holder) { return state::from_data(tensorData); }, "Return a state from matrix product state tensor data.") + .def_static( + "from_data", + [&holder](const std::vector &tensors) { + const bool isHostData = tensors.empty() || !isCupyArray(tensors[0]); + if (!holder.getTarget().config.GpuRequired && !isHostData) + throw std::runtime_error(fmt::format( + "Current target '{}' does not support CuPy arrays.", + holder.getTarget().name)); + TensorStateData tensorData; + for (auto &tensor : tensors) { + auto arr = nanobind::cast>(tensor); + std::vector extents; + for (size_t i = 0; i < arr.ndim(); ++i) + extents.push_back(arr.shape(i)); + tensorData.emplace_back( + std::pair>{arr.data(), + extents}); + } + return state::from_data(tensorData); + }, + "Return a state from matrix product state tensor data.") .def_static( "from_data", [](const nanobind::list &tensors) { @@ -667,7 +645,7 @@ void cudaq::bindPyState(nanobind::module_ &mod, LinkedLibraryHolder &holder) { // for cupy arrays (implementing Python array interface), may be // overshadowed by any std::vector overloads. TensorStateData tensorData; - for (auto tensor : tensors) { + for (nanobind::handle tensor : tensors) { // Make sure this is a CuPy array if (!nanobind::hasattr(tensor, "data")) throw std::runtime_error( @@ -681,8 +659,8 @@ void cudaq::bindPyState(nanobind::module_ &mod, LinkedLibraryHolder &holder) { // We know this is a cupy device pointer. Start by ensuring it is // of proper complex type - auto typeStr = nanobind::cast( - tensor.attr("dtype").attr("name")); + auto typeStr = + std::string(nanobind::str(tensor.attr("dtype")).c_str()); if (typeStr != "complex128") throw std::runtime_error( "invalid from_data operation on nanobind::object tensors - " @@ -719,8 +697,8 @@ void cudaq::bindPyState(nanobind::module_ &mod, LinkedLibraryHolder &holder) { // We know this is a cupy device pointer. Start by ensuring it is of // complex type - auto typeStr = nanobind::cast( - opaqueData.attr("dtype").attr("name")); + auto typeStr = + std::string(nanobind::str(opaqueData.attr("dtype")).c_str()); if (typeStr.find("float") != std::string::npos) throw std::runtime_error( "CuPy array with only floating point elements passed to " @@ -848,7 +826,7 @@ index pair. [](state &self) { std::stringstream ss; self.dump(ss); - nanobind::print(ss.str().c_str()); + nanobind::module_::import_("builtins").attr("print")(ss.str()); }, "Print the state to the console.") .def("__str__", @@ -863,7 +841,7 @@ index pair. "Compute the overlap between the provided :class:`State`'s.") .def( "overlap", - [&holder](state &self, nanobind::object &other) { + [&holder](state &self, nanobind::object other) { if (self.get_num_tensors() != 1) throw std::runtime_error("overlap NumPy interop only supported " "for vector and matrix state data."); @@ -891,7 +869,7 @@ index pair. // Start by ensuring it is of complex type auto typeStr = - nanobind::cast(other.attr("dtype").attr("name")); + std::string(nanobind::str(other.attr("dtype")).c_str()); if (typeStr.find("float") != std::string::npos) throw std::runtime_error( "CuPy array with only floating point elements passed to " diff --git a/python/runtime/cudaq/algorithms/py_translate.cpp b/python/runtime/cudaq/algorithms/py_translate.cpp index 86faaa47c79..3c0cdc8e8a5 100644 --- a/python/runtime/cudaq/algorithms/py_translate.cpp +++ b/python/runtime/cudaq/algorithms/py_translate.cpp @@ -14,8 +14,10 @@ #include "cudaq/runtime/logger/logger.h" #include "cudaq_internal/compiler/TracePassInstrumentation.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "mlir/Pass/PassManager.h" #include "mlir/Target/LLVMIR/Export.h" @@ -54,7 +56,7 @@ static std::string translate_impl(const std::string &shortName, cudaq::marshal_arguments_for_module_launch(mod, runtimeArguments, fn); return StringSwitch>(formatPair.first) - .Cases("qir", "qir-full", "qir-adaptive", "qir-base", + .Cases({"qir", "qir-full", "qir-adaptive", "qir-base"}, [&]() { return cudaq::detail::lower_to_qir_llvm(shortName, mod, opaques, format); @@ -94,7 +96,6 @@ void cudaq::bindPyTranslate(nanobind::module_ &mod) { if (failed(pm.run(mod))) throw std::runtime_error("Conversion to " + format + " failed."); llvm::LLVMContext llvmContext; - llvmContext.setOpaquePointers(false); std::unique_ptr llvmModule = translateModuleToLLVMIR(mod, llvmContext); if (!llvmModule) diff --git a/python/runtime/cudaq/algorithms/py_unitary.cpp b/python/runtime/cudaq/algorithms/py_unitary.cpp index 3aefbbc957d..5d67ee17a01 100644 --- a/python/runtime/cudaq/algorithms/py_unitary.cpp +++ b/python/runtime/cudaq/algorithms/py_unitary.cpp @@ -10,7 +10,7 @@ #include "cudaq/algorithms/unitary.h" #include "runtime/cudaq/operators/py_helpers.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" -#include "utils/NanobindAdaptors.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" using namespace cudaq; @@ -24,7 +24,7 @@ static nanobind::object get_unitary_impl(const std::string &shortName, // Return as numpy array (dim, dim), complex128 auto temp = contrib::get_unitary_cmat(std::move(f)); - return nanobind::cast(details::cmat_to_numpy(temp)); + return details::cmat_to_numpy(temp); } /// Bind the get_unitary cudaq function diff --git a/python/runtime/cudaq/algorithms/py_utils.cpp b/python/runtime/cudaq/algorithms/py_utils.cpp index e396f93c3a5..069dc67c154 100644 --- a/python/runtime/cudaq/algorithms/py_utils.cpp +++ b/python/runtime/cudaq/algorithms/py_utils.cpp @@ -25,16 +25,17 @@ nanobind::dict get_serializable_var_dict() { auto key = item.first; auto value = item.second; - if (nanobind::cast(key).starts_with("__")) { + std::string keyStr(nanobind::str(key).c_str()); + if (keyStr.starts_with("__")) { // Ignore items that start with "__" (like Python __builtins__, etc.) } else if (nanobind::hasattr(value, "to_json")) { - auto type = value.type(); - std::string module = - nanobind::cast(type.attr("__module__")); - std::string name = nanobind::cast(type.attr("__name__")); + auto type = nanobind::handle( + reinterpret_cast(Py_TYPE(value.ptr()))); + std::string module(nanobind::str(type.attr("__module__")).c_str()); + std::string name(nanobind::str(type.attr("__name__")).c_str()); auto type_name = nanobind::str((module + "." + name).c_str()); - auto json_key_name = nanobind::str(nanobind::str(key).c_str()) + - nanobind::str("/") + type_name; + nanobind::str json_key_name( + (keyStr + "/" + module + "." + name).c_str()); serialized_dict[json_key_name] = json.attr("loads")(value.attr("to_json")()); } else if (nanobind::hasattr(value, "tolist")) { @@ -44,12 +45,7 @@ nanobind::dict get_serializable_var_dict() { serialized_dict[key] = json.attr("loads")(json.attr("dumps")(value)); } } catch (const nanobind::python_error &e) { - // Uncomment the following lines for debug, but all this really means is - // that we won't send this to the remote server. - - // std::cout << "Failed to serialize key '" - // << nanobind::cast(item.first) - // << "' : " + std::string(e.what()) << std::endl; + // Serialization failures are non-fatal - we just skip the entry. } }; @@ -60,7 +56,7 @@ nanobind::dict get_serializable_var_dict() { std::vector frame_vec; auto current_frame = inspect.attr("currentframe")(); while (current_frame && !current_frame.is_none()) { - frame_vec.push_back(current_frame); + frame_vec.push_back(nanobind::object(current_frame)); current_frame = current_frame.attr("f_back"); } @@ -68,8 +64,7 @@ nanobind::dict get_serializable_var_dict() { // globals first to locals last. This ensures that the overwrites give // precedence to closest-to-locals. for (auto it = frame_vec.rbegin(); it != frame_vec.rend(); ++it) { - nanobind::dict f_locals = - nanobind::cast(it->attr("f_locals")); + nanobind::dict f_locals = it->attr("f_locals"); for (const auto item : f_locals) try_to_add_item(item); } @@ -133,20 +128,18 @@ std::string get_var_name_for_handle(const nanobind::handle &h) { // Search locals first, walking up the call stack auto current_frame = inspect.attr("currentframe")(); while (current_frame && !current_frame.is_none()) { - nanobind::dict f_locals = - nanobind::cast(current_frame.attr("f_locals")); + nanobind::dict f_locals = current_frame.attr("f_locals"); for (auto item : f_locals) if (item.second.is(h)) - return nanobind::cast(nanobind::str(item.first)); + return std::string(nanobind::str(item.first).c_str()); current_frame = current_frame.attr("f_back"); } // Search globals now current_frame = inspect.attr("currentframe")(); - nanobind::dict f_globals = - nanobind::cast(current_frame.attr("f_globals")); + nanobind::dict f_globals = current_frame.attr("f_globals"); for (auto item : f_globals) if (item.second.is(h)) - return nanobind::cast(nanobind::str(item.first)); + return std::string(nanobind::str(item.first).c_str()); return std::string(); } @@ -163,6 +156,18 @@ void bindPyDataClassRegistry(nanobind::module_ &mod) { "Is class registered\n") .def_static("getClassAttributes", &DataClassRegistry::getClassAttributes, "Find registered class and its attributes\n") - .def_ro_static("classes", &DataClassRegistry::classes); + .def_static( + "get_classes", + []() -> decltype(DataClassRegistry::classes) & { + return DataClassRegistry::classes; + }, + nanobind::rv_policy::reference, "Get all registered classes.") + .def_prop_ro_static( + "classes", + [](nanobind::handle /*cls*/) + -> decltype(DataClassRegistry::classes) & { + return DataClassRegistry::classes; + }, + nanobind::rv_policy::reference, "Get all registered classes."); } } // namespace cudaq diff --git a/python/runtime/cudaq/domains/plugins/CMakeLists.txt b/python/runtime/cudaq/domains/plugins/CMakeLists.txt index 3bd2e991655..f92505aa221 100644 --- a/python/runtime/cudaq/domains/plugins/CMakeLists.txt +++ b/python/runtime/cudaq/domains/plugins/CMakeLists.txt @@ -17,10 +17,13 @@ add_library(cudaq-pyscf SHARED PySCFDriver.cpp) target_compile_options(cudaq-pyscf PRIVATE -Wno-cast-qual) +target_include_directories(cudaq-pyscf PRIVATE + ${Python3_INCLUDE_DIRS} +) if (SKBUILD) target_link_libraries(cudaq-pyscf PRIVATE - nanobind-static Python::Module + nanobind-static Python3::Module cudaq-chemistry cudaq-operator cudaq cudaq-py-utils cudaq-platform-default) # Apple's linker (ld64) doesn't support --unresolved-symbols flag if (NOT APPLE) @@ -28,12 +31,9 @@ if (SKBUILD) PRIVATE -Wl,--unresolved-symbols=ignore-in-object-files) endif() else() - if (NOT Python_FOUND) - message(FATAL_ERROR "find_package(Python) not run?") - endif() target_link_libraries(cudaq-pyscf PRIVATE - nanobind-static Python::Python + nanobind-static Python3::Python cudaq-chemistry cudaq-operator cudaq cudaq-py-utils cudaq-platform-default) endif() diff --git a/python/runtime/cudaq/dynamics/CMakeLists.txt b/python/runtime/cudaq/dynamics/CMakeLists.txt index d7910fdf586..b4f9b49fd43 100644 --- a/python/runtime/cudaq/dynamics/CMakeLists.txt +++ b/python/runtime/cudaq/dynamics/CMakeLists.txt @@ -6,14 +6,31 @@ # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # -find_package(Python COMPONENTS Interpreter Development) +find_package(CUDAToolkit REQUIRED) -nanobind_add_module(nvqir_dynamics_bindings NB_STATIC pyDynamics.cpp) +nanobind_add_module(nvqir_dynamics_bindings + NB_SHARED + NB_DOMAIN cudaq + pyDynamics.cpp) +target_include_directories(nvqir_dynamics_bindings PRIVATE + ${Python3_INCLUDE_DIRS} + ${nanobind_INCLUDE_DIR} +) +find_file(CUDENSITYMAT_INC + NAMES cudensitymat.h + HINTS + $ENV{CUQUANTUM_INSTALL_PREFIX}/include + /usr/include + ENV CPATH + REQUIRED +) +get_filename_component(CUDENSITYMAT_INCLUDE_DIR ${CUDENSITYMAT_INC} DIRECTORY) target_include_directories(nvqir_dynamics_bindings PRIVATE ${CMAKE_SOURCE_DIR}/runtime - ${CMAKE_SOURCE_DIR}/runtime/nvqir/cudensitymat + ${CMAKE_SOURCE_DIR}/runtime/nvqir/cudensitymat + ${CUDENSITYMAT_INCLUDE_DIR} ${CUDAToolkit_INCLUDE_DIRS}) target_link_libraries(nvqir_dynamics_bindings PRIVATE cudaq-logger @@ -29,12 +46,12 @@ endif() if(NOT SKBUILD) set_target_properties(nvqir_dynamics_bindings PROPERTIES - INSTALL_RPATH "${_origin_prefix}/../../lib;${_origin_prefix}/../../lib/plugins" + INSTALL_RPATH "${_origin_prefix}/../../lib;${_origin_prefix}/../../lib/plugins;${_origin_prefix}/../mlir/_mlir_libs" BUILD_RPATH "${CMAKE_BINARY_DIR}/lib" ) else() set_target_properties(nvqir_dynamics_bindings PROPERTIES - INSTALL_RPATH "${_origin_prefix}/../../lib;${_origin_prefix}/../../cuda_quantum.libs" + INSTALL_RPATH "${_origin_prefix}/../../lib;${_origin_prefix}/../../cuda_quantum.libs;${_origin_prefix}/../mlir/_mlir_libs" BUILD_RPATH "${CMAKE_BINARY_DIR}/lib" ) endif() diff --git a/python/runtime/cudaq/operators/py_boson_op.cpp b/python/runtime/cudaq/operators/py_boson_op.cpp index 6df75bd5a27..514ab1fcbb0 100644 --- a/python/runtime/cudaq/operators/py_boson_op.cpp +++ b/python/runtime/cudaq/operators/py_boson_op.cpp @@ -7,12 +7,11 @@ ******************************************************************************/ #include -#include #include #include #include #include -#include +#include #include #include #include @@ -108,11 +107,11 @@ void bindBosonOperator(nanobind::module_ &mod) { .def( "__iter__", [](boson_op &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -166,13 +165,15 @@ void bindBosonOperator(nanobind::module_ &mod) { .def( "to_matrix", - [](const boson_op &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const boson_op &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -182,28 +183,39 @@ void bindBosonOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_matrix", - [](const boson_op &self, dimension_map &dimensions, bool invert_order, - const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const boson_op &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const boson_op &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") .def( "to_sparse_matrix", - [](const boson_op &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - return self.to_sparse_matrix(dimensions, params, invert_order); + [](const boson_op &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + return self.to_sparse_matrix(dims, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Return the sparse matrix representation of the operator. This " "representation is a " @@ -217,13 +229,12 @@ void bindBosonOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_sparse_matrix", - [](const boson_op &self, dimension_map &dimensions, bool invert_order, - const nanobind::kwargs &kwargs) { - return self.to_sparse_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const boson_op &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + return self.to_sparse_matrix(dimensions, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Return the sparse matrix representation of the operator. This " "representation is a " "`Tuple[list[complex], list[int], list[int]]`, encoding the " @@ -280,6 +291,7 @@ void bindBosonOperator(nanobind::module_ &mod) { .def(nanobind::self -= boson_op_term(), nanobind::is_operator()) .def(nanobind::self *= nanobind::self, nanobind::is_operator()) .def(nanobind::self += nanobind::self, nanobind::is_operator()) +// see issue https://github.com/pybind/pybind11/issues/1893 #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wself-assign-overloaded" @@ -361,17 +373,21 @@ void bindBosonOperator(nanobind::module_ &mod) { .def("dump", &boson_op::dump, "Prints the string representation of the operator to the standard " "output.") - .def("trim", &boson_op::trim, nanobind::arg("tol") = 0.0, - nanobind::arg("parameters") = parameter_map(), - "Removes all terms from the sum for which the absolute value of the " - "coefficient is below " - "the given tolerance.") .def( "trim", - [](boson_op &self, double tol, const nanobind::kwargs &kwargs) { + [](boson_op &self, double tol, std::optional params) { + return self.trim(tol, params.value_or(parameter_map())); + }, + nanobind::arg("tol") = 0.0, + nanobind::arg("parameters").none() = nanobind::none(), + "Removes all terms from the sum for which the absolute value of the " + "coefficient is below " + "the given tolerance.") + .def( + "trim", + [](boson_op &self, double tol, nanobind::kwargs kwargs) { return self.trim(tol, details::kwargs_to_param_map(kwargs)); }, - nanobind::arg("tol") = 0.0, nanobind::arg("kwargs"), "Removes all terms from the sum for which the absolute value of the " "coefficient is below " "the given tolerance.") @@ -396,11 +412,11 @@ void bindBosonOperator(nanobind::module_ &mod) { .def( "__iter__", [](boson_op_term &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -476,20 +492,26 @@ void bindBosonOperator(nanobind::module_ &mod) { // evaluations - .def("evaluate_coefficient", &boson_op_term::evaluate_coefficient, - nanobind::arg("parameters") = parameter_map(), - "Returns the evaluated coefficient of the product operator. The " - "parameters is a map of parameter names to their concrete, complex " - "values.") + .def( + "evaluate_coefficient", + [](const boson_op_term &self, std::optional params) { + return self.evaluate_coefficient(params.value_or(parameter_map())); + }, + nanobind::arg("parameters").none() = nanobind::none(), + "Returns the evaluated coefficient of the product operator. The " + "parameters is a map of parameter names to their concrete, complex " + "values.") .def( "to_matrix", - [](const boson_op_term &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const boson_op_term &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -499,28 +521,39 @@ void bindBosonOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_matrix", - [](const boson_op_term &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const boson_op_term &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const boson_op_term &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") .def( "to_sparse_matrix", - [](const boson_op_term &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - return self.to_sparse_matrix(dimensions, params, invert_order); + [](const boson_op_term &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + return self.to_sparse_matrix(dims, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Return the sparse matrix representation of the operator. This " "representation is a " @@ -534,13 +567,12 @@ void bindBosonOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_sparse_matrix", - [](const boson_op_term &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - return self.to_sparse_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const boson_op_term &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + return self.to_sparse_matrix(dimensions, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Return the sparse matrix representation of the operator. This " "representation is a " "`Tuple[list[complex], list[int], list[int]]`, encoding the " @@ -636,8 +668,8 @@ void bindBosonOperator(nanobind::module_ &mod) { .def("is_identity", &boson_op_term::is_identity, "Checks if all operators in the product are the identity. " - "Note that this function returns true regardless of the value of " - "the coefficient.") + "Note: this function returns true regardless of the value of the " + "coefficient.") .def( "__str__", [](const boson_op_term &self) { return self.to_string(); }, "Returns the string representation of the operator.") diff --git a/python/runtime/cudaq/operators/py_fermion_op.cpp b/python/runtime/cudaq/operators/py_fermion_op.cpp index 621f39c873f..c53c00ce56e 100644 --- a/python/runtime/cudaq/operators/py_fermion_op.cpp +++ b/python/runtime/cudaq/operators/py_fermion_op.cpp @@ -7,13 +7,11 @@ ******************************************************************************/ #include -#include #include #include #include -#include #include -#include +#include #include #include #include @@ -104,11 +102,11 @@ void bindFermionOperator(nanobind::module_ &mod) { .def( "__iter__", [](fermion_op &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -162,13 +160,15 @@ void bindFermionOperator(nanobind::module_ &mod) { .def( "to_matrix", - [](const fermion_op &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const fermion_op &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -178,28 +178,39 @@ void bindFermionOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_matrix", - [](const fermion_op &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const fermion_op &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const fermion_op &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") .def( "to_sparse_matrix", - [](const fermion_op &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - return self.to_sparse_matrix(dimensions, params, invert_order); + [](const fermion_op &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + return self.to_sparse_matrix(dims, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Return the sparse matrix representation of the operator. This " "representation is a " @@ -213,13 +224,12 @@ void bindFermionOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_sparse_matrix", - [](const fermion_op &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - return self.to_sparse_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const fermion_op &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + return self.to_sparse_matrix(dimensions, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Return the sparse matrix representation of the operator. This " "representation is a " "`Tuple[list[complex], list[int], list[int]]`, encoding the " @@ -276,6 +286,7 @@ void bindFermionOperator(nanobind::module_ &mod) { .def(nanobind::self -= fermion_op_term(), nanobind::is_operator()) .def(nanobind::self *= nanobind::self, nanobind::is_operator()) .def(nanobind::self += nanobind::self, nanobind::is_operator()) +// see issue https://github.com/pybind/pybind11/issues/1893 #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wself-assign-overloaded" @@ -357,17 +368,22 @@ void bindFermionOperator(nanobind::module_ &mod) { .def("dump", &fermion_op::dump, "Prints the string representation of the operator to the standard " "output.") - .def("trim", &fermion_op::trim, nanobind::arg("tol") = 0.0, - nanobind::arg("parameters") = parameter_map(), - "Removes all terms from the sum for which the absolute value of the " - "coefficient is below " - "the given tolerance.") .def( "trim", - [](fermion_op &self, double tol, const nanobind::kwargs &kwargs) { + [](fermion_op &self, double tol, + std::optional params) { + return self.trim(tol, params.value_or(parameter_map())); + }, + nanobind::arg("tol") = 0.0, + nanobind::arg("parameters").none() = nanobind::none(), + "Removes all terms from the sum for which the absolute value of the " + "coefficient is below " + "the given tolerance.") + .def( + "trim", + [](fermion_op &self, double tol, nanobind::kwargs kwargs) { return self.trim(tol, details::kwargs_to_param_map(kwargs)); }, - nanobind::arg("tol") = 0.0, nanobind::arg("kwargs"), "Removes all terms from the sum for which the absolute value of the " "coefficient is below " "the given tolerance.") @@ -392,11 +408,11 @@ void bindFermionOperator(nanobind::module_ &mod) { .def( "__iter__", [](fermion_op_term &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -473,20 +489,27 @@ void bindFermionOperator(nanobind::module_ &mod) { // evaluations - .def("evaluate_coefficient", &fermion_op_term::evaluate_coefficient, - nanobind::arg("parameters") = parameter_map(), - "Returns the evaluated coefficient of the product operator. The " - "parameters is a map of parameter names to their concrete, complex " - "values.") + .def( + "evaluate_coefficient", + [](const fermion_op_term &self, std::optional params) { + return self.evaluate_coefficient(params.value_or(parameter_map())); + }, + nanobind::arg("parameters").none() = nanobind::none(), + "Returns the evaluated coefficient of the product operator. The " + "parameters is a map of parameter names to their concrete, complex " + "values.") .def( "to_matrix", - [](const fermion_op_term &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const fermion_op_term &self, + std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -496,28 +519,40 @@ void bindFermionOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_matrix", - [](const fermion_op_term &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const fermion_op_term &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const fermion_op_term &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") .def( "to_sparse_matrix", - [](const fermion_op_term &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - return self.to_sparse_matrix(dimensions, params, invert_order); + [](const fermion_op_term &self, + std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + return self.to_sparse_matrix(dims, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Return the sparse matrix representation of the operator. This " "representation is a " @@ -531,13 +566,12 @@ void bindFermionOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_sparse_matrix", - [](const fermion_op_term &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - return self.to_sparse_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const fermion_op_term &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + return self.to_sparse_matrix(dimensions, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Return the sparse matrix representation of the operator. This " "representation is a " "`Tuple[list[complex], list[int], list[int]]`, encoding the " @@ -633,8 +667,8 @@ void bindFermionOperator(nanobind::module_ &mod) { .def("is_identity", &fermion_op_term::is_identity, "Checks if all operators in the product are the identity. " - "Note that this function returns true regardless of the value of " - "the coefficient.") + "Note: this function returns true regardless of the value of the " + "coefficient.") .def( "__str__", [](const fermion_op_term &self) { return self.to_string(); }, diff --git a/python/runtime/cudaq/operators/py_handlers.cpp b/python/runtime/cudaq/operators/py_handlers.cpp index e8c2147e92b..ba44cc90d5f 100644 --- a/python/runtime/cudaq/operators/py_handlers.cpp +++ b/python/runtime/cudaq/operators/py_handlers.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -75,23 +76,27 @@ void bindOperatorHandlers(nanobind::module_ &mod) { "Returns the string representation of the operator.") .def( "to_matrix", - [](const matrix_handler &self, dimension_map &dimensions, - const parameter_map ¶ms) { - auto cmat = self.to_matrix(dimensions, params); + [](const matrix_handler &self, + std::optional dimensions, + std::optional params) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions") = nanobind::none(), + nanobind::arg("parameters") = nanobind::none(), "Returns the matrix representation of the operator.") .def( "to_matrix", - [](const matrix_handler &self, dimension_map &dimensions, - const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix(dimensions, - details::kwargs_to_param_map(kwargs)); + [](const matrix_handler &self, + std::optional dimensions, nanobind::kwargs kwargs) { + dimension_map dims = dimensions.value_or(dimension_map()); + auto cmat = + self.to_matrix(dims, details::kwargs_to_param_map(kwargs)); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), + nanobind::arg("dimensions") = nanobind::none(), nanobind::arg("kwargs"), "Returns the matrix representation of the operator.") @@ -100,7 +105,7 @@ void bindOperatorHandlers(nanobind::module_ &mod) { "_define", [](std::string operator_id, std::vector expected_dimensions, const matrix_callback &func, bool overwrite, - const nanobind::kwargs &kwargs) { + nanobind::kwargs kwargs) { // we need to make sure the python function that is stored in // the static dictionary containing the operator definitions // is properly cleaned up - otherwise python will hang on exit... @@ -136,23 +141,26 @@ void bindOperatorHandlers(nanobind::module_ &mod) { "Returns the string representation of the operator.") .def( "to_matrix", - [](const boson_handler &self, dimension_map &dimensions, - const parameter_map ¶ms) { - auto cmat = self.to_matrix(dimensions, params); + [](const boson_handler &self, std::optional dimensions, + std::optional params) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions") = nanobind::none(), + nanobind::arg("parameters") = nanobind::none(), "Returns the matrix representation of the operator.") .def( "to_matrix", - [](const boson_handler &self, dimension_map &dimensions, - const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix(dimensions, - details::kwargs_to_param_map(kwargs)); + [](const boson_handler &self, std::optional dimensions, + nanobind::kwargs kwargs) { + dimension_map dims = dimensions.value_or(dimension_map()); + auto cmat = + self.to_matrix(dims, details::kwargs_to_param_map(kwargs)); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), + nanobind::arg("dimensions") = nanobind::none(), nanobind::arg("kwargs"), "Returns the matrix representation of the operator."); @@ -171,23 +179,27 @@ void bindOperatorHandlers(nanobind::module_ &mod) { "Returns the string representation of the operator.") .def( "to_matrix", - [](const fermion_handler &self, dimension_map &dimensions, - const parameter_map ¶ms) { - auto cmat = self.to_matrix(dimensions, params); + [](const fermion_handler &self, + std::optional dimensions, + std::optional params) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions") = nanobind::none(), + nanobind::arg("parameters") = nanobind::none(), "Returns the matrix representation of the operator.") .def( "to_matrix", - [](const fermion_handler &self, dimension_map &dimensions, - const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix(dimensions, - details::kwargs_to_param_map(kwargs)); + [](const fermion_handler &self, + std::optional dimensions, nanobind::kwargs kwargs) { + dimension_map dims = dimensions.value_or(dimension_map()); + auto cmat = + self.to_matrix(dims, details::kwargs_to_param_map(kwargs)); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), + nanobind::arg("dimensions") = nanobind::none(), nanobind::arg("kwargs"), "Returns the matrix representation of the operator."); @@ -208,23 +220,26 @@ void bindOperatorHandlers(nanobind::module_ &mod) { "Returns the string representation of the operator.") .def( "to_matrix", - [](const spin_handler &self, dimension_map &dimensions, - const parameter_map ¶ms) { - auto cmat = self.to_matrix(dimensions, params); + [](const spin_handler &self, std::optional dimensions, + std::optional params) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions") = nanobind::none(), + nanobind::arg("parameters") = nanobind::none(), "Returns the matrix representation of the operator.") .def( "to_matrix", - [](const spin_handler &self, dimension_map &dimensions, - const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix(dimensions, - details::kwargs_to_param_map(kwargs)); + [](const spin_handler &self, std::optional dimensions, + nanobind::kwargs kwargs) { + dimension_map dims = dimensions.value_or(dimension_map()); + auto cmat = + self.to_matrix(dims, details::kwargs_to_param_map(kwargs)); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), + nanobind::arg("dimensions") = nanobind::none(), nanobind::arg("kwargs"), "Returns the matrix representation of the operator."); } diff --git a/python/runtime/cudaq/operators/py_helpers.cpp b/python/runtime/cudaq/operators/py_helpers.cpp index b34212bce6e..e14ac5a1750 100644 --- a/python/runtime/cudaq/operators/py_helpers.cpp +++ b/python/runtime/cudaq/operators/py_helpers.cpp @@ -8,7 +8,6 @@ #include "py_helpers.h" #include "cudaq/operators.h" -#include #include #include #include @@ -27,6 +26,14 @@ cudaq::parameter_map kwargs_to_param_map(const nanobind::kwargs &kwargs) { return params; } +cudaq::parameter_map kwargs_to_param_map(nanobind::kwargs &kwargs, + bool &invert_order) { + nanobind::str invert_key("invert_order"); + nanobind::object inv = kwargs.attr("pop")(invert_key, nanobind::bool_(false)); + invert_order = nanobind::cast(inv); + return kwargs_to_param_map(static_cast(kwargs)); +} + std::unordered_map kwargs_to_param_description(const nanobind::kwargs &kwargs) { std::unordered_map param_desc; @@ -39,23 +46,18 @@ kwargs_to_param_description(const nanobind::kwargs &kwargs) { return param_desc; } -nanobind::ndarray> -cmat_to_numpy(complex_matrix &cmat) { +nanobind::object cmat_to_numpy(complex_matrix &cmat) { auto rows = cmat.rows(); auto cols = cmat.cols(); - auto *src = cmat.get_data(complex_matrix::order::row_major); - std::size_t n = rows * cols; - std::size_t shape[2] = {rows, cols}; - - auto *copy = new std::complex[n]; - std::copy(src, src + n, copy); - - nanobind::capsule owner(copy, [](void *p) noexcept { - delete[] static_cast *>(p); - }); - - return nanobind::ndarray>(copy, 2, - shape, owner); -} + auto *data = cmat.get_data(complex_matrix::order::row_major); + + // Use .cast() to force immediate creation of the numpy array. + // Since no owner is specified, rv_policy::automatic will copy the data, + // making this safe even when cmat is a temporary (e.g. in get_unitary). + return nanobind::ndarray, + nanobind::shape<-1, -1>>(data, {rows, cols}, + nanobind::handle()) + .cast(); +}; } // namespace cudaq::details diff --git a/python/runtime/cudaq/operators/py_helpers.h b/python/runtime/cudaq/operators/py_helpers.h index e712281784f..026f6f9b2fe 100644 --- a/python/runtime/cudaq/operators/py_helpers.h +++ b/python/runtime/cudaq/operators/py_helpers.h @@ -12,8 +12,11 @@ namespace cudaq::details { cudaq::parameter_map kwargs_to_param_map(const nanobind::kwargs &kwargs); +/// Extracts parameter map from `kwargs`, also extracting an optional +/// "invert_order" boolean (defaults to false if not present). +cudaq::parameter_map kwargs_to_param_map(nanobind::kwargs &kwargs, + bool &invert_order); std::unordered_map kwargs_to_param_description(const nanobind::kwargs &kwargs); -nanobind::ndarray> -cmat_to_numpy(complex_matrix &cmat); +nanobind::object cmat_to_numpy(complex_matrix &cmat); } // namespace cudaq::details diff --git a/python/runtime/cudaq/operators/py_matrix.cpp b/python/runtime/cudaq/operators/py_matrix.cpp index 48d37891e7f..32aa5f87a8d 100644 --- a/python/runtime/cudaq/operators/py_matrix.cpp +++ b/python/runtime/cudaq/operators/py_matrix.cpp @@ -28,18 +28,32 @@ void bindComplexMatrix(nanobind::module_ &mod) { "matrix of complex elements.") .def( "__init__", - [](complex_matrix *self, - nanobind::ndarray, nanobind::ndim<2>, - nanobind::c_contig, nanobind::numpy> - arr) { - auto rows = arr.shape(0); - auto cols = arr.shape(1); - new (self) complex_matrix(rows, cols); - memcpy(self->get_data(complex_matrix::order::row_major), arr.data(), - sizeof(std::complex) * rows * cols); + [](complex_matrix *self, nanobind::object b) { + auto arr = nanobind::cast>(b); + if (arr.ndim() != 2) + throw std::runtime_error("ComplexMatrix requires a 2D array"); + if (arr.shape(0) == 0 || arr.shape(1) == 0) + throw std::runtime_error("Matrix dimensions must be non-zero."); + + new (self) complex_matrix(arr.shape(0), arr.shape(1)); + + // Stride-aware element-wise copy so both row-major (C) and + // column-major (Fortran) layouts are handled correctly. + // nanobind strides are counted in elements, not bytes. + auto *dest = self->get_data(complex_matrix::order::row_major); + auto *src = static_cast *>(arr.data()); + auto stride0 = arr.stride(0); + auto stride1 = arr.stride(1); + for (size_t i = 0; i < arr.shape(0); ++i) + for (size_t j = 0; j < arr.shape(1); ++j) + dest[i * arr.shape(1) + j] = src[i * stride0 + j * stride1]; }, "Create a :class:`ComplexMatrix` from a buffer of data, such as a " "numpy.ndarray.") + .def( + "to_numpy", + [](complex_matrix &op) { return details::cmat_to_numpy(op); }, + "Convert to a NumPy array.") .def( "num_rows", [](complex_matrix &m) { return m.rows(); }, "Returns the number of rows in the matrix.") diff --git a/python/runtime/cudaq/operators/py_matrix_op.cpp b/python/runtime/cudaq/operators/py_matrix_op.cpp index 3883f86c9bd..071050ce0aa 100644 --- a/python/runtime/cudaq/operators/py_matrix_op.cpp +++ b/python/runtime/cudaq/operators/py_matrix_op.cpp @@ -7,11 +7,11 @@ ******************************************************************************/ #include -#include #include #include #include #include +#include #include #include #include @@ -109,11 +109,11 @@ void bindMatrixOperator(nanobind::module_ &mod) { .def( "__iter__", [](matrix_op &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -161,13 +161,15 @@ void bindMatrixOperator(nanobind::module_ &mod) { .def( "to_matrix", - [](const matrix_op &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const matrix_op &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -178,20 +180,29 @@ void bindMatrixOperator(nanobind::module_ &mod) { .def( "to_matrix", - [](const matrix_op &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const matrix_op &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const matrix_op &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") // comparisons @@ -240,6 +251,7 @@ void bindMatrixOperator(nanobind::module_ &mod) { .def(nanobind::self -= matrix_op_term(), nanobind::is_operator()) .def(nanobind::self *= nanobind::self, nanobind::is_operator()) .def(nanobind::self += nanobind::self, nanobind::is_operator()) +// see issue https://github.com/pybind/pybind11/issues/1893 #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wself-assign-overloaded" @@ -315,17 +327,21 @@ void bindMatrixOperator(nanobind::module_ &mod) { .def("dump", &matrix_op::dump, "Prints the string representation of the operator to the standard " "output.") - .def("trim", &matrix_op::trim, nanobind::arg("tol") = 0.0, - nanobind::arg("parameters") = parameter_map(), - "Removes all terms from the sum for which the absolute value of the " - "coefficient is below " - "the given tolerance.") .def( "trim", - [](matrix_op &self, double tol, const nanobind::kwargs &kwargs) { + [](matrix_op &self, double tol, std::optional params) { + return self.trim(tol, params.value_or(parameter_map())); + }, + nanobind::arg("tol") = 0.0, + nanobind::arg("parameters").none() = nanobind::none(), + "Removes all terms from the sum for which the absolute value of the " + "coefficient is below " + "the given tolerance.") + .def( + "trim", + [](matrix_op &self, double tol, nanobind::kwargs kwargs) { return self.trim(tol, details::kwargs_to_param_map(kwargs)); }, - nanobind::arg("tol") = 0.0, nanobind::arg("kwargs"), "Removes all terms from the sum for which the absolute value of the " "coefficient is below " "the given tolerance.") @@ -350,11 +366,11 @@ void bindMatrixOperator(nanobind::module_ &mod) { .def( "__iter__", [](matrix_op_term &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -434,20 +450,27 @@ void bindMatrixOperator(nanobind::module_ &mod) { // evaluations - .def("evaluate_coefficient", &matrix_op_term::evaluate_coefficient, - nanobind::arg("parameters") = parameter_map(), - "Returns the evaluated coefficient of the product operator. The " - "parameters is a map of parameter names to their concrete, complex " - "values.") + .def( + "evaluate_coefficient", + [](const matrix_op_term &self, std::optional params) { + return self.evaluate_coefficient(params.value_or(parameter_map())); + }, + nanobind::arg("parameters").none() = nanobind::none(), + "Returns the evaluated coefficient of the product operator. The " + "parameters is a map of parameter names to their concrete, complex " + "values.") .def( "to_matrix", - [](const matrix_op_term &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const matrix_op_term &self, + std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -457,20 +480,29 @@ void bindMatrixOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_matrix", - [](const matrix_op_term &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const matrix_op_term &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const matrix_op_term &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") // comparisons @@ -551,8 +583,8 @@ void bindMatrixOperator(nanobind::module_ &mod) { .def("is_identity", &matrix_op_term::is_identity, "Checks if all operators in the product are the identity. " - "Note that this function returns true regardless of the value of " - "the coefficient.") + "Note: this function returns true regardless of the value of the " + "coefficient.") .def( "__str__", [](const matrix_op_term &self) { return self.to_string(); }, diff --git a/python/runtime/cudaq/operators/py_spin_op.cpp b/python/runtime/cudaq/operators/py_spin_op.cpp index e901dcac0cd..894b35f989a 100644 --- a/python/runtime/cudaq/operators/py_spin_op.cpp +++ b/python/runtime/cudaq/operators/py_spin_op.cpp @@ -7,12 +7,11 @@ ******************************************************************************/ #include -#include #include #include #include -#include #include +#include #include #include #include @@ -41,7 +40,7 @@ spin_op fromOpenFermionQubitOperator(nanobind::object &op) { for (auto term : terms) { auto termTuple = nanobind::cast(term); auto localTerm = spin_op::identity(); - for (auto element : termTuple) { + for (nanobind::handle element : termTuple) { auto casted = nanobind::cast>(element); localTerm *= creatorMap[casted.second](casted.first); @@ -136,11 +135,11 @@ void bindSpinOperator(nanobind::module_ &mod) { .def( "__iter__", [](spin_op &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -237,13 +236,15 @@ void bindSpinOperator(nanobind::module_ &mod) { .def( "to_matrix", - [](const spin_op &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const spin_op &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -253,28 +254,39 @@ void bindSpinOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_matrix", - [](const spin_op &self, dimension_map &dimensions, bool invert_order, - const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const spin_op &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const spin_op &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") .def( "to_sparse_matrix", - [](const spin_op &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - return self.to_sparse_matrix(dimensions, params, invert_order); + [](const spin_op &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + return self.to_sparse_matrix(dims, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Return the sparse matrix representation of the operator. This " "representation is a " @@ -288,13 +300,12 @@ void bindSpinOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_sparse_matrix", - [](const spin_op &self, dimension_map &dimensions, bool invert_order, - const nanobind::kwargs &kwargs) { - return self.to_sparse_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const spin_op &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + return self.to_sparse_matrix(dimensions, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Return the sparse matrix representation of the operator. This " "representation is a " "`Tuple[list[complex], list[int], list[int]]`, encoding the " @@ -351,6 +362,7 @@ void bindSpinOperator(nanobind::module_ &mod) { .def(nanobind::self -= spin_op_term(), nanobind::is_operator()) .def(nanobind::self *= nanobind::self, nanobind::is_operator()) .def(nanobind::self += nanobind::self, nanobind::is_operator()) +// see issue https://github.com/pybind/pybind11/issues/1893 #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wself-assign-overloaded" @@ -443,18 +455,22 @@ void bindSpinOperator(nanobind::module_ &mod) { auto data = self.get_data_representation(); return json.attr("dumps")(data); }, - "Convert spin_op to a JSON string, e.g., '[d1, d2, d3, ...]'.") - .def("trim", &spin_op::trim, nanobind::arg("tol") = 0.0, - nanobind::arg("parameters") = parameter_map(), - "Removes all terms from the sum for which the absolute value of the " - "coefficient is below " - "the given tolerance.") + "Convert spin_op to JSON string: '[d1, d2, d3, ...]'") + .def( + "trim", + [](spin_op &self, double tol, std::optional params) { + return self.trim(tol, params.value_or(parameter_map())); + }, + nanobind::arg("tol") = 0.0, + nanobind::arg("parameters").none() = nanobind::none(), + "Removes all terms from the sum for which the absolute value of the " + "coefficient is below " + "the given tolerance.") .def( "trim", - [](spin_op &self, double tol, const nanobind::kwargs &kwargs) { + [](spin_op &self, double tol, nanobind::kwargs kwargs) { return self.trim(tol, details::kwargs_to_param_map(kwargs)); }, - nanobind::arg("tol") = 0.0, nanobind::arg("kwargs"), "Removes all terms from the sum for which the absolute value of the " "coefficient is below " "the given tolerance.") @@ -626,11 +642,11 @@ void bindSpinOperator(nanobind::module_ &mod) { .def( "__iter__", [](spin_op_term &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the operator.") // properties @@ -749,20 +765,26 @@ void bindSpinOperator(nanobind::module_ &mod) { // evaluations - .def("evaluate_coefficient", &spin_op_term::evaluate_coefficient, - nanobind::arg("parameters") = parameter_map(), - "Returns the evaluated coefficient of the product operator. The " - "parameters is a map of parameter names to their concrete, complex " - "values.") + .def( + "evaluate_coefficient", + [](const spin_op_term &self, std::optional params) { + return self.evaluate_coefficient(params.value_or(parameter_map())); + }, + nanobind::arg("parameters").none() = nanobind::none(), + "Returns the evaluated coefficient of the product operator. The " + "parameters is a map of parameter names to their concrete, complex " + "values.") .def( "to_matrix", - [](const spin_op_term &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - auto cmat = self.to_matrix(dimensions, params, invert_order); + [](const spin_op_term &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + auto cmat = self.to_matrix(dims, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " @@ -772,28 +794,39 @@ void bindSpinOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_matrix", - [](const spin_op_term &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - auto cmat = self.to_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const spin_op_term &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimensions, pm, invert_order); return details::cmat_to_numpy(cmat); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Returns the matrix representation of the operator." "The matrix is ordered according to the convention (endianness) " "used in CUDA-Q, and the ordering returned by `degrees`. This order " "can be inverted by setting the optional `invert_order` argument to " "`True`. " "See also the documentation for `degrees` for more detail.") + .def( + "to_matrix", + [](const spin_op_term &self, nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + auto cmat = self.to_matrix(dimension_map(), pm, invert_order); + return details::cmat_to_numpy(cmat); + }, + "Returns the matrix representation of the operator, passing " + "parameters as keyword arguments.") .def( "to_sparse_matrix", - [](const spin_op_term &self, dimension_map &dimensions, - const parameter_map ¶ms, bool invert_order) { - return self.to_sparse_matrix(dimensions, params, invert_order); + [](const spin_op_term &self, std::optional dimensions, + std::optional params, bool invert_order) { + dimension_map dims = dimensions.value_or(dimension_map()); + parameter_map pm = params.value_or(parameter_map()); + return self.to_sparse_matrix(dims, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("parameters") = parameter_map(), + nanobind::arg("dimensions").none() = nanobind::none(), + nanobind::arg("parameters").none() = nanobind::none(), nanobind::arg("invert_order") = false, "Return the sparse matrix representation of the operator. This " "representation is a " @@ -807,13 +840,12 @@ void bindSpinOperator(nanobind::module_ &mod) { "See also the documentation for `degrees` for more detail.") .def( "to_sparse_matrix", - [](const spin_op_term &self, dimension_map &dimensions, - bool invert_order, const nanobind::kwargs &kwargs) { - return self.to_sparse_matrix( - dimensions, details::kwargs_to_param_map(kwargs), invert_order); + [](const spin_op_term &self, dimension_map dimensions, + nanobind::kwargs kwargs) { + bool invert_order; + auto pm = details::kwargs_to_param_map(kwargs, invert_order); + return self.to_sparse_matrix(dimensions, pm, invert_order); }, - nanobind::arg("dimensions") = dimension_map(), - nanobind::arg("invert_order") = false, nanobind::arg("kwargs"), "Return the sparse matrix representation of the operator. This " "representation is a " "`Tuple[list[complex], list[int], list[int]]`, encoding the " @@ -909,8 +941,8 @@ void bindSpinOperator(nanobind::module_ &mod) { .def("is_identity", &spin_op_term::is_identity, "Checks if all operators in the product are the identity. " - "Note that this function returns true regardless of the value of " - "the coefficient.") + "Note: this function returns true regardless of the value of the " + "coefficient.") .def( "__str__", [](const spin_op_term &self) { return self.to_string(); }, "Returns the string representation of the operator.") @@ -932,7 +964,7 @@ void bindSpinOperator(nanobind::module_ &mod) { auto data = spin_op(self).get_data_representation(); return json.attr("dumps")(data); }, - "Convert spin_op to a JSON string, e.g., '[d1, d2, d3, ...]'.") + "Convert spin_op to JSON string: '[d1, d2, d3, ...]'") // only exists for spin operators .def( "get_pauli_word", diff --git a/python/runtime/cudaq/operators/py_super_op.cpp b/python/runtime/cudaq/operators/py_super_op.cpp index 2c18dfbc820..89d3197dac4 100644 --- a/python/runtime/cudaq/operators/py_super_op.cpp +++ b/python/runtime/cudaq/operators/py_super_op.cpp @@ -7,7 +7,6 @@ ******************************************************************************/ #include -#include #include #include #include @@ -79,11 +78,11 @@ void bindSuperOperatorWrapper(nanobind::module_ &mod) { .def( "__iter__", [](super_op &self) { - return nanobind::make_iterator(nanobind::type(), - "iterator", self.begin(), - self.end()); + nanobind::list items; + for (auto it = self.begin(); it != self.end(); ++it) + items.append(nanobind::cast(*it)); + return items.attr("__iter__")(); }, - nanobind::keep_alive<0, 1>(), "Loop through each term of the super-operator.") .def(nanobind::self += nanobind::self, nanobind::is_operator()); } diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index e78a1e34d57..464dd5ced97 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -26,14 +26,13 @@ #include "runtime/cudaq/algorithms/py_utils.h" #include "runtime/cudaq/platform/PythonSignalCheck.h" #include "utils/LinkedLibraryHolder.h" -#include "utils/NanobindAdaptors.h" #include "utils/OpaqueArguments.h" #include "utils/PyTypes.h" -#include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Error.h" -#include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/Host.h" +#include "llvm/TargetParser/SubtargetFeature.h" #include "mlir/CAPI/ExecutionEngine.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/ExecutionEngine/OptUtils.h" @@ -55,9 +54,6 @@ #include using namespace mlir; -using namespace cudaq_internal::compiler; -using cudaq::JitEngine; -using cudaq::PackingStyle; static std::function getTransportLayer = []() -> std::string { throw std::runtime_error("binding for kernel launch is incomplete"); @@ -89,7 +85,7 @@ static std::unique_ptr cudaqStateStorage = static std::string createDataLayout() { // Setup the machine properties from the current architecture. - auto targetTriple = llvm::sys::getDefaultTargetTriple(); + llvm::Triple targetTriple(llvm::sys::getDefaultTargetTriple()); std::string errorMessage; const auto *target = llvm::TargetRegistry::lookupTarget(targetTriple, errorMessage); @@ -98,11 +94,9 @@ static std::string createDataLayout() { std::string cpu(llvm::sys::getHostCPUName()); llvm::SubtargetFeatures features; - llvm::StringMap hostFeatures; - - if (llvm::sys::getHostCPUFeatures(hostFeatures)) - for (auto &f : hostFeatures) - features.AddFeature(f.first(), f.second); + auto hostFeatures = llvm::sys::getHostCPUFeatures(); + for (auto &f : hostFeatures) + features.AddFeature(f.first(), f.second); std::unique_ptr machine(target->createTargetMachine( targetTriple, cpu, features.getString(), {}, {})); @@ -182,7 +176,7 @@ nanobind::args cudaq::simplifiedValidateInputArguments(nanobind::args &args) { return processed; } -template +template void cudaq::handleStructMemberVariable(void *data, std::size_t offset, mlir::Type memberType, nanobind::object value) { @@ -214,7 +208,7 @@ void cudaq::handleStructMemberVariable(void *data, std::size_t offset, // synthesis path: span {ptr, size_t} // argsCreator path: std::vector {ptr, ptr, ptr} constexpr std::size_t copySize = - sizeof(std::conditional_t