Skip to content

Commit aeac757

Browse files
authored
[webgpu] Fix the incorrect shapes when profiling (microsoft#26928)
This pull request refactors how input and output tensor shape information is stored and accessed in the WebGPU context. Instead of keeping references to the full input and output tensors, only their shapes are now stored, which helps avoid issues with accessing released tensors during profiling. Before `"inputs[0] = {1,1,768} inputs[1] = {200064,96,1} inputs[2] = {} outputs[0] = {} "` After `"inputs[0] = {1,1,768} inputs[1] = {200064,96,1} inputs[2] = {19206144} outputs[0] = {1,1,200064} "`
1 parent cde7ed4 commit aeac757

2 files changed

Lines changed: 19 additions & 11 deletions

File tree

onnxruntime/core/providers/webgpu/webgpu_context.cc

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -628,17 +628,15 @@ void WebGpuContext::CollectProfilingData(profiling::Events& events) {
628628

629629
for (size_t i = 0; i < pending_kernels.size(); i++) {
630630
const PendingKernelInfo& pending_kernel_info = pending_kernels[i];
631-
const auto& inputs = pending_kernel_info.inputs;
632-
const auto& outputs = pending_kernel_info.outputs;
631+
const auto& input_shapes = pending_kernel_info.input_shapes;
632+
const auto& output_shapes = pending_kernel_info.output_shapes;
633633

634634
SS(shapes, 128);
635-
for (size_t s = 0; s < inputs.size(); s++) {
636-
const auto& input = inputs[s];
637-
shapes << "inputs[" << s << "] = " << input.override_shape.ToString() << " ";
635+
for (size_t s = 0; s < input_shapes.size(); s++) {
636+
shapes << "inputs[" << s << "] = " << input_shapes[s].ToString() << " ";
638637
}
639-
for (size_t s = 0; s < outputs.size(); s++) {
640-
const auto& output = outputs[s];
641-
shapes << "outputs[" << s << "] = " << output.override_shape.ToString() << " ";
638+
for (size_t s = 0; s < output_shapes.size(); s++) {
639+
shapes << "outputs[" << s << "] = " << output_shapes[s].ToString() << " ";
642640
}
643641

644642
if (gpu_timestamp_offset_ == 0) {

onnxruntime/core/providers/webgpu/webgpu_context.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,16 +268,26 @@ class WebGpuContext final {
268268
std::string_view cache_key,
269269
const std::vector<ProgramInput>& inputs,
270270
const std::vector<ProgramOutput>& outputs)
271-
: name{absl::StrJoin({kernel_name, kernel_type, program_name}, "&")}, cache_key{cache_key}, inputs{inputs}, outputs{outputs} {}
271+
: name{absl::StrJoin({kernel_name, kernel_type, program_name}, "&")}, cache_key{cache_key} {
272+
// Store shape information instead of tensor pointers to avoid accessing released tensors
273+
input_shapes.reserve(inputs.size());
274+
for (const auto& input : inputs) {
275+
input_shapes.emplace_back(input.use_override_shape ? input.override_shape : input.tensor->Shape());
276+
}
277+
output_shapes.reserve(outputs.size());
278+
for (const auto& output : outputs) {
279+
output_shapes.emplace_back(output.use_override_shape ? output.override_shape : output.tensor->Shape());
280+
}
281+
}
272282

273283
PendingKernelInfo(PendingKernelInfo&&) = default;
274284
PendingKernelInfo& operator=(PendingKernelInfo&&) = default;
275285
ORT_DISALLOW_COPY_AND_ASSIGNMENT(PendingKernelInfo);
276286

277287
std::string name;
278288
std::string cache_key;
279-
std::vector<ProgramInput> inputs;
280-
std::vector<ProgramOutput> outputs;
289+
std::vector<TensorShape> input_shapes;
290+
std::vector<TensorShape> output_shapes;
281291
};
282292

283293
struct PendingQueryInfo {

0 commit comments

Comments
 (0)