diff --git a/openxla/patches/20240901-001-Various-macOS-QOL-enchancements.patch b/openxla/patches/20240901-001-Various-macOS-QOL-enchancements.patch index 3c5603e..b1f09b4 100644 --- a/openxla/patches/20240901-001-Various-macOS-QOL-enchancements.patch +++ b/openxla/patches/20240901-001-Various-macOS-QOL-enchancements.patch @@ -14,40 +14,9 @@ PR: https://github.com/openxla/xla/pull/16696 Co-authored-by: Steeve Morin --- - tensorflow.bazelrc | 5 ++--- - xla/pjrt/c/BUILD | 18 ++++++++++-------- - 2 files changed, 12 insertions(+), 11 deletions(-) + xla/pjrt/c/BUILD | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) -diff --git a/tensorflow.bazelrc b/tensorflow.bazelrc -index f2ad3f6169..032cca5657 100644 ---- a/tensorflow.bazelrc -+++ b/tensorflow.bazelrc -@@ -688,7 +688,6 @@ test:release_arm64_linux --flaky_test_attempts=3 - build:release_cpu_macos --config=avx_linux - - # Base build configs for macOS --build:release_macos_base --action_env DEVELOPER_DIR=/Applications/Xcode.app/Contents/Developer - build:release_macos_base --define=no_nccl_support=true --output_filter=^$ - - # Ensure release_base is set on mac -@@ -701,7 +700,7 @@ build:release_macos_x86 --config=avx_linux - build:release_macos_x86 --cpu=darwin - # Target Catalina as the minimum compatible OS version - build:release_macos_x86 --macos_minimum_os=10.15 --build:release_macos_x86 --action_env MACOSX_DEPLOYMENT_TARGET=10.15 -+build:release_macos_x86 --macos_sdk_version=10.15 - - # Build configs for macOS Arm64 - build:release_macos_arm64 --config=release_macos_base -@@ -709,7 +708,7 @@ build:release_macos_arm64 --cpu=darwin_arm64 - build:release_macos_arm64 --define=tensorflow_mkldnn_contraction_kernel=0 - # Target Moneterey as the minimum compatible OS version - build:release_macos_arm64 --macos_minimum_os=12.0 --build:release_macos_arm64 --action_env MACOSX_DEPLOYMENT_TARGET=12.0 -+build:release_macos_arm64 --macos_sdk_version=12.0 - - # Base test configs for macOS - test:release_macos_base --verbose_failures=true --local_test_jobs=HOST_CPUS diff --git a/xla/pjrt/c/BUILD b/xla/pjrt/c/BUILD index a0485b6a43..6f67ee6b78 100644 --- a/xla/pjrt/c/BUILD @@ -101,4 +70,3 @@ index a0485b6a43..6f67ee6b78 100644 "//xla/stream_executor:cuda_platform", -- 2.39.5 (Apple Git-154) - diff --git a/openxla/patches/20250225-001-Patch-cudnn-sdpa.patch b/openxla/patches/20250225-001-Patch-cudnn-sdpa.patch new file mode 100644 index 0000000..27fa791 --- /dev/null +++ b/openxla/patches/20250225-001-Patch-cudnn-sdpa.patch @@ -0,0 +1,152 @@ +From 3039cbe576b79c489920c165e12c1fdc08a5321a Mon Sep 17 00:00:00 2001 +From: Hugo Mano +Date: Tue, 25 Feb 2025 10:44:44 +0000 +Subject: [PATCH] pagged sdpa + +--- + xla/service/gpu/backend_configs.proto | 2 + + .../transforms/cudnn_custom_call_compiler.cc | 23 ++++++++++- + xla/stream_executor/cuda/cuda_dnn.cc | 39 +++++++++++++++++++ + xla/stream_executor/cuda/cuda_dnn.h | 5 +++ + 4 files changed, 67 insertions(+), 2 deletions(-) + +diff --git a/xla/service/gpu/backend_configs.proto b/xla/service/gpu/backend_configs.proto +index a7ff5bfba2..6ddbb5424e 100644 +--- a/xla/service/gpu/backend_configs.proto ++++ b/xla/service/gpu/backend_configs.proto +@@ -283,6 +283,8 @@ message CudnnfMHABackendConfig { + // Only used with packed layout + // ignored if the valued <= 1 + int32 max_seg_per_batch = 25; ++ ++ optional int32 max_sequence_length_kv = 26; + } + + // Backend config for a general custom call instruction, e.g. XLA FFI. +diff --git a/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc b/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc +index 4c15388a0a..2a15f3304c 100644 +--- a/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc ++++ b/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc +@@ -139,6 +139,22 @@ absl::StatusOr BuildGraphForCustomCallToForwardFMHA( + TF_ASSIGN_OR_RETURN(bias, TensorDescriptorFor(bias_hlo.shape())); + } + ++ std::optional sequence_length_q; ++ std::optional sequence_length_kv; ++ std::optional page_table_k; ++ std::optional page_table_v; ++ ++ if (custom_call->operand_count() == 7) { ++ TF_ASSIGN_OR_RETURN(sequence_length_q, ++ TensorDescriptorFor(custom_call->operand(4)->shape())); ++ TF_ASSIGN_OR_RETURN(sequence_length_kv, ++ TensorDescriptorFor(custom_call->operand(5)->shape())); ++ TF_ASSIGN_OR_RETURN(page_table_k, ++ TensorDescriptorFor(custom_call->operand(6)->shape())); ++ TF_ASSIGN_OR_RETURN(page_table_v, ++ TensorDescriptorFor(custom_call->operand(7)->shape())); ++ } ++ + const double dropout_rate = config.dropout_rate(); + + TF_ASSIGN_OR_RETURN(CudnnfMHAMaskKind cudnn_mask_type, +@@ -148,13 +164,16 @@ absl::StatusOr BuildGraphForCustomCallToForwardFMHA( + + const int sliding_window_length = config.sliding_window_length(); + const int max_seg_per_batch = config.max_seg_per_batch(); ++ std::optional max_sequence_length_kv = config.max_sequence_length_kv(); + TF_ASSIGN_OR_RETURN( + se::gpu::CudnnGraph graph, + se::gpu::GetCudnnFlashAttentionOperationGraph( +- dnn_support, lhs_bmm1, rhs_bmm1, rhs_bmm2, output, bias, activation, ++ dnn_support, lhs_bmm1, rhs_bmm1, rhs_bmm2, output, bias, sequence_length_q, ++ sequence_length_kv, activation, + static_cast(config.fmha_scale()), dropout_rate > 0.0, + dropout_rate, dnn_mask_type, sliding_window_length, +- max_seg_per_batch)); ++ page_table_k, page_table_v, ++ max_sequence_length_kv, max_seg_per_batch)); + return graph; + } + +diff --git a/xla/stream_executor/cuda/cuda_dnn.cc b/xla/stream_executor/cuda/cuda_dnn.cc +index 808870837f..922477beb8 100644 +--- a/xla/stream_executor/cuda/cuda_dnn.cc ++++ b/xla/stream_executor/cuda/cuda_dnn.cc +@@ -4981,9 +4981,14 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + const dnn::MatmulTensorDescriptor& v_descriptor, + const dnn::TensorDescriptor& o_descriptor, + const std::optional bias_descriptor, ++ const std::optional sequence_length_q, ++ const std::optional sequence_length_kv, + const std::optional stats_descriptor, double scale, + const bool use_dropout, const std::optional dropout_rate, + const dnn::FMHAMaskKind mask_type, const int sliding_window_length, ++ const std::optional page_table_k, ++ const std::optional page_table_v, ++ const std::optional max_sequence_length_kv, + const int max_seg_per_batch) { + using cudnn_frontend::graph::Tensor_attributes; + +@@ -5139,6 +5144,40 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + if (sliding_window_length > 0) { + sdpa_options.set_sliding_window_length(sliding_window_length); + } ++ ++ if (sequence_length_q && sequence_length_kv && page_table_k && page_table_v && max_sequence_length_kv) { ++ auto seq_q = graph.tensor(Tensor_attributes() ++ .set_name("seq_q") ++ .set_uid(next_uid()) ++ .set_dim(sequence_length_q->dimensions()) ++ .set_stride(sequence_length_q->GetLogicalStrides()) ++ .set_data_type(cudnn_frontend::DataType_t::INT32)); ++ auto seq_kv = graph.tensor(Tensor_attributes() ++ .set_name("seq_kv") ++ .set_uid(next_uid()) ++ .set_dim(sequence_length_kv->dimensions()) ++ .set_stride(sequence_length_kv->GetLogicalStrides()) ++ .set_data_type(cudnn_frontend::DataType_t::INT32)); ++ sdpa_options.set_padding_mask(true).set_seq_len_q(seq_q).set_seq_len_kv(seq_kv); ++ ++ auto page_table_k_ = graph.tensor(Tensor_attributes() ++ .set_name("page_table_k") ++ .set_uid(next_uid()) ++ .set_dim(page_table_k->dimensions()) ++ .set_stride(page_table_k->GetLogicalStrides()) ++ .set_data_type(cudnn_frontend::DataType_t::INT32)); ++ auto page_table_v_ = graph.tensor(Tensor_attributes() ++ .set_name("page_table_v") ++ .set_uid(next_uid()) ++ .set_dim(page_table_v->dimensions()) ++ .set_stride(page_table_v->GetLogicalStrides()) ++ .set_data_type(cudnn_frontend::DataType_t::INT32)); ++ ++ sdpa_options.set_paged_attention_k_table(page_table_k_); ++ sdpa_options.set_paged_attention_v_table(page_table_v_); ++ sdpa_options.set_paged_attention_max_seq_len_kv(max_sequence_length_kv.value()); ++ } ++ + // Add SDPA to the graph. + auto [o_tensor, stats_tensor] = + graph.sdpa(q_tensor, k_tensor, v_tensor, sdpa_options); +diff --git a/xla/stream_executor/cuda/cuda_dnn.h b/xla/stream_executor/cuda/cuda_dnn.h +index 946e419311..9146eaa785 100644 +--- a/xla/stream_executor/cuda/cuda_dnn.h ++++ b/xla/stream_executor/cuda/cuda_dnn.h +@@ -714,9 +714,14 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + const dnn::MatmulTensorDescriptor& v_descriptor, + const dnn::TensorDescriptor& o_descriptor, + const std::optional bias_descriptor, ++ const std::optional sequence_length_q, ++ const std::optional sequence_length_kv, + const std::optional stats_descriptor, double scale, + const bool use_dropout, const std::optional dropout_rate, + const dnn::FMHAMaskKind mask_type, const int sliding_window_length, ++ const std::optional page_table_k, ++ const std::optional page_table_v, ++ const std::optional max_sequence_length_kv, + const int max_seg_per_batch); + + absl::StatusOr GetCudnnFlashAttentionF8OperationGraph( +-- +2.34.1 diff --git a/openxla/patches/20250225-002-Patch-cudnn-sdpa.patch b/openxla/patches/20250225-002-Patch-cudnn-sdpa.patch new file mode 100644 index 0000000..6ca7360 --- /dev/null +++ b/openxla/patches/20250225-002-Patch-cudnn-sdpa.patch @@ -0,0 +1,52 @@ +From c4c568754c53674072317f322d511fcba93d7871 Mon Sep 17 00:00:00 2001 +From: Hugo Mano +Date: Tue, 25 Feb 2025 15:54:15 +0000 +Subject: [PATCH] patche + +--- + xla/service/gpu/transforms/cudnn_custom_call_compiler.cc | 2 +- + xla/stream_executor/cuda/cuda_dnn.cc | 9 ++++++++- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc b/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc +index 2a15f3304c..1604494003 100644 +--- a/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc ++++ b/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc +@@ -144,7 +144,7 @@ absl::StatusOr BuildGraphForCustomCallToForwardFMHA( + std::optional page_table_k; + std::optional page_table_v; + +- if (custom_call->operand_count() == 7) { ++ if (custom_call->operand_count() == 8) { + TF_ASSIGN_OR_RETURN(sequence_length_q, + TensorDescriptorFor(custom_call->operand(4)->shape())); + TF_ASSIGN_OR_RETURN(sequence_length_kv, +diff --git a/xla/stream_executor/cuda/cuda_dnn.cc b/xla/stream_executor/cuda/cuda_dnn.cc +index 922477beb8..1038d4b43f 100644 +--- a/xla/stream_executor/cuda/cuda_dnn.cc ++++ b/xla/stream_executor/cuda/cuda_dnn.cc +@@ -5025,6 +5025,11 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + std::vector v_dims = + v_descriptor.GetCudnnCompatibleDimensions(false); + ++ ++ VLOG(4) << "\n GetCudnnCompatibleDimensions: q_dims: " << absl::StrJoin(q_dims, ","); ++ VLOG(4) << "\n GetCudnnCompatibleDimensions: k_dims: " << absl::StrJoin(k_dims, ","); ++ VLOG(4) << "\n GetCudnnCompatibleDimensions: v_dims: " << absl::StrJoin(v_dims, ","); ++ + if (max_seg_per_batch > 1) { + FixDimsForRaggedOffset(q_dims, max_seg_per_batch); + FixDimsForRaggedOffset(k_dims, max_seg_per_batch); +@@ -5037,7 +5042,9 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + .set_dim(q_dims) + .set_stride(q_descriptor.GetCudnnCompatibleStrides(true)) + .set_uid(next_uid())); +- ++ VLOG(4) << "\n q_strides: " << absl::StrJoin(q_descriptor.GetCudnnCompatibleStrides(true), ","); ++ VLOG(4) << "\n k_strides: " << absl::StrJoin(k_descriptor.GetCudnnCompatibleStrides(true), ","); ++ VLOG(4) << "\n v_strides: " << absl::StrJoin(v_descriptor.GetCudnnCompatibleStrides(false), ","); + std::shared_ptr k_tensor = + graph.tensor(Tensor_attributes() + .set_name("K") +-- +2.34.1 \ No newline at end of file diff --git a/openxla/patches/20250225-003-Patch-cudnn-sdpa.patch b/openxla/patches/20250225-003-Patch-cudnn-sdpa.patch new file mode 100644 index 0000000..0bd9609 --- /dev/null +++ b/openxla/patches/20250225-003-Patch-cudnn-sdpa.patch @@ -0,0 +1,25 @@ +From b81051b2d45f486652a72cf88c355b4db9c9cd6b Mon Sep 17 00:00:00 2001 +From: Hugo Mano +Date: Tue, 25 Feb 2025 16:50:40 +0000 +Subject: [PATCH] remote attn scale + +--- + xla/stream_executor/cuda/cuda_dnn.cc | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/xla/stream_executor/cuda/cuda_dnn.cc b/xla/stream_executor/cuda/cuda_dnn.cc +index 1038d4b43f..d87c8acc43 100644 +--- a/xla/stream_executor/cuda/cuda_dnn.cc ++++ b/xla/stream_executor/cuda/cuda_dnn.cc +@@ -5064,8 +5064,7 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + cudnn_frontend::graph::SDPA_attributes sdpa_options; + sdpa_options.set_name("flash_attention") + .set_is_inference(stats_descriptor == std::nullopt) +- .set_causal_mask(is_causal) +- .set_attn_scale(scale); ++ .set_causal_mask(is_causal); + + // Setting bias + if (bias_descriptor.has_value()) { +-- +2.34.1 \ No newline at end of file diff --git a/openxla/patches/20250225-004-Patch-cudnn-sdpa.patch b/openxla/patches/20250225-004-Patch-cudnn-sdpa.patch new file mode 100644 index 0000000..40cbabc --- /dev/null +++ b/openxla/patches/20250225-004-Patch-cudnn-sdpa.patch @@ -0,0 +1,94 @@ +diff --git a/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc b/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc +index 6199a84562..6a325e7433 100644 +--- a/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc ++++ b/xla/service/gpu/transforms/cudnn_custom_call_compiler.cc +@@ -133,26 +133,26 @@ absl::StatusOr BuildGraphForCustomCallToForwardFMHA( + } + + std::optional bias; +- if (kind == CudnnfMHAKind::kScaleBiasSoftmax || +- kind == CudnnfMHAKind::kScaleBiasSoftmaxDropout) { +- const HloInstruction &bias_hlo = *custom_call->operand(3); +- TF_ASSIGN_OR_RETURN(bias, TensorDescriptorFor(bias_hlo.shape())); +- } ++ //if (kind == CudnnfMHAKind::kScaleBiasSoftmax || ++ // kind == CudnnfMHAKind::kScaleBiasSoftmaxDropout) { ++ // const HloInstruction &bias_hlo = *custom_call->operand(3); ++ // TF_ASSIGN_OR_RETURN(bias, TensorDescriptorFor(bias_hlo.shape())); ++ //} + + std::optional sequence_length_q; + std::optional sequence_length_kv; + std::optional page_table_k; + std::optional page_table_v; + +- if (custom_call->operand_count() == 8) { ++ if (custom_call->operand_count() == 7) { + TF_ASSIGN_OR_RETURN(sequence_length_q, +- TensorDescriptorFor(custom_call->operand(4)->shape())); ++ TensorDescriptorFor(custom_call->operand(3)->shape())); + TF_ASSIGN_OR_RETURN(sequence_length_kv, +- TensorDescriptorFor(custom_call->operand(5)->shape())); ++ TensorDescriptorFor(custom_call->operand(4)->shape())); + TF_ASSIGN_OR_RETURN(page_table_k, +- TensorDescriptorFor(custom_call->operand(6)->shape())); ++ TensorDescriptorFor(custom_call->operand(5)->shape())); + TF_ASSIGN_OR_RETURN(page_table_v, +- TensorDescriptorFor(custom_call->operand(7)->shape())); ++ TensorDescriptorFor(custom_call->operand(6)->shape())); + } + + const double dropout_rate = config.dropout_rate(); +diff --git a/xla/stream_executor/cuda/cuda_dnn.cc b/xla/stream_executor/cuda/cuda_dnn.cc +index 2d6bd67570..0720df385d 100644 +--- a/xla/stream_executor/cuda/cuda_dnn.cc ++++ b/xla/stream_executor/cuda/cuda_dnn.cc +@@ -5079,27 +5079,27 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + // Setting actual seqlen + bool is_padding = mask_type == dnn::FMHAMaskKind::PADDING || + mask_type == dnn::FMHAMaskKind::PADDING_CAUSAL; +- if (is_padding || max_seg_per_batch > 1) { +- // Get batch size +- auto b = q_dims[0]; +- auto seq_q_tensor = +- graph.tensor(Tensor_attributes() +- .set_name("seq_q") +- .set_dim({b, 1, 1, 1}) +- .set_stride({1, 1, 1, 1}) +- .set_uid(next_uid()) +- .set_data_type(cudnn_frontend::DataType_t::INT32)); +- auto seq_kv_tensor = +- graph.tensor(Tensor_attributes() +- .set_name("seq_kv") +- .set_dim({b, 1, 1, 1}) +- .set_stride({1, 1, 1, 1}) +- .set_uid(next_uid()) +- .set_data_type(cudnn_frontend::DataType_t::INT32)); +- sdpa_options.set_padding_mask(true); +- sdpa_options.set_seq_len_q(seq_q_tensor); +- sdpa_options.set_seq_len_kv(seq_kv_tensor); +- } ++ //if (is_padding || max_seg_per_batch > 1) { ++ // // Get batch size ++ // auto b = q_dims[0]; ++ // auto seq_q_tensor = ++ // graph.tensor(Tensor_attributes() ++ // .set_name("seq_q") ++ // .set_dim({b, 1, 1, 1}) ++ // .set_stride({1, 1, 1, 1}) ++ // .set_uid(next_uid()) ++ // .set_data_type(cudnn_frontend::DataType_t::INT32)); ++ // auto seq_kv_tensor = ++ // graph.tensor(Tensor_attributes() ++ // .set_name("seq_kv") ++ // .set_dim({b, 1, 1, 1}) ++ // .set_stride({1, 1, 1, 1}) ++ // .set_uid(next_uid()) ++ // .set_data_type(cudnn_frontend::DataType_t::INT32)); ++ // sdpa_options.set_padding_mask(true); ++ // sdpa_options.set_seq_len_q(seq_q_tensor); ++ // sdpa_options.set_seq_len_kv(seq_kv_tensor); ++ //} + + std::shared_ptr offset_q; + if (max_seg_per_batch > 1) { diff --git a/openxla/patches/20250225-005-Patch-cudnn-sdpa.patch b/openxla/patches/20250225-005-Patch-cudnn-sdpa.patch new file mode 100644 index 0000000..02cf05d --- /dev/null +++ b/openxla/patches/20250225-005-Patch-cudnn-sdpa.patch @@ -0,0 +1,26 @@ +From b929c4905a202c56e107e3c44c79ba7c87c1d4b4 Mon Sep 17 00:00:00 2001 +From: Corentin Godeau +Date: Thu, 27 Feb 2025 10:45:31 +0100 +Subject: [PATCH] Re-enable attention scale + +--- + xla/stream_executor/cuda/cuda_dnn.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xla/stream_executor/cuda/cuda_dnn.cc b/xla/stream_executor/cuda/cuda_dnn.cc +index 4a0dd1970b..06a0c240bc 100644 +--- a/xla/stream_executor/cuda/cuda_dnn.cc ++++ b/xla/stream_executor/cuda/cuda_dnn.cc +@@ -5064,7 +5064,8 @@ absl::StatusOr GetCudnnFlashAttentionOperationGraph( + cudnn_frontend::graph::SDPA_attributes sdpa_options; + sdpa_options.set_name("flash_attention") + .set_is_inference(stats_descriptor == std::nullopt) +- .set_causal_mask(is_causal); ++ .set_causal_mask(is_causal) ++ .set_attn_scale(scale); + + // Setting bias + if (bias_descriptor.has_value()) { +-- +2.39.3 (Apple Git-145) + diff --git a/openxla/patches/20250225-006-Patch-cudnn-sdpa.patch b/openxla/patches/20250225-006-Patch-cudnn-sdpa.patch new file mode 100644 index 0000000..cff9392 --- /dev/null +++ b/openxla/patches/20250225-006-Patch-cudnn-sdpa.patch @@ -0,0 +1,25 @@ +From 3b31141ade884a025bca78df53faca708d44f765 Mon Sep 17 00:00:00 2001 +From: Corentin Godeau +Date: Thu, 27 Feb 2025 13:37:13 +0100 +Subject: [PATCH] enable dynamic slice fusion for FMHA custom call + +--- + xla/service/gpu/transforms/dynamic_slice_fusion_rewriter.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xla/service/gpu/transforms/dynamic_slice_fusion_rewriter.cc b/xla/service/gpu/transforms/dynamic_slice_fusion_rewriter.cc +index 80d1a9a2d8..5d37e13f42 100644 +--- a/xla/service/gpu/transforms/dynamic_slice_fusion_rewriter.cc ++++ b/xla/service/gpu/transforms/dynamic_slice_fusion_rewriter.cc +@@ -535,7 +535,7 @@ absl::StatusOr DynamicSliceFusionRewriter::Run( + for (HloInstruction* instr : computation->instructions()) { + if ((HloPredicateIsOp(instr) && + instr->shape().IsArray()) || +- IsLegacyCublasMatmul(*instr) || IsCustomCall(instr, platform_name_)) { ++ IsLegacyCublasMatmul(*instr) || IsCustomCallTofMHA(*instr) || IsCustomCall(instr, platform_name_)) { + UseDefDataflowPaths sliced_operand_paths = + GetSlicedOperandPaths(instr, call_graph.get()); + bool has_sliced_operand_paths = sliced_operand_paths.size() > 1; +-- +2.39.3 (Apple Git-145) +