Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions onnxruntime/core/providers/webgpu/compute_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,6 @@ const webgpu::BufferManager& ComputeContextBase::BufferManagerAccessor::Get(cons
return context.ep_.BufferManager();
}

Status ComputeContextBase::CreateUnmappedGPUTensor(AllocatorPtr alloc, MLDataType data_type, const TensorShape& shape, std::unique_ptr<Tensor>& tensor) const {
ORT_RETURN_IF_NOT(alloc != nullptr, "Allocator must not be null when creating GPU tensor.");

tensor = std::make_unique<Tensor>(data_type, shape, alloc);
ORT_RETURN_IF_NOT(tensor != nullptr, "Failed to allocate GPU tensor.");

void* data = tensor->MutableDataRaw();
ORT_RETURN_IF_NOT(data != nullptr, "Failed to get GPU tensor buffer.");

auto buffer = reinterpret_cast<WGPUBuffer>(data);
if (wgpuBufferGetMapState(buffer) != WGPUBufferMapState_Unmapped) {
wgpuBufferUnmap(buffer);
}
return Status::OK();
}

ComputeContext::ComputeContext(WebGpuContext& webgpu_context,
const WebGpuExecutionProvider& ep,
const OpKernel& op_kernel,
Expand Down
3 changes: 0 additions & 3 deletions onnxruntime/core/providers/webgpu/compute_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ class ComputeContextBase {
return op_kernel_.Node().Name();
}

Status CreateUnmappedGPUTensor(AllocatorPtr alloc, MLDataType data_type, const TensorShape& shape,
std::unique_ptr<Tensor>& tensor) const;

//
// Get the operator type.
//
Expand Down
8 changes: 3 additions & 5 deletions onnxruntime/core/providers/webgpu/nn/conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -354,11 +354,9 @@
}
TensorShape transposed_kernel_shape(transposed_kernel_shape_vector);

ORT_ENFORCE(alloc != nullptr, "Allocator must be provided for WebGPU pre-pack.");

// Create the transposed kernel tensor using the WebGPU allocator.
// Both input tensor and output tensor are GPU tensors, ready for GPU operations.
ORT_RETURN_IF_ERROR(context.CreateUnmappedGPUTensor(alloc, tensor.DataType(), transposed_kernel_shape, transposed_kernel_));
// Create the transposed kernel tensor using the prepack allocator.
// This allocator creates GPU buffers without mapping, suitable for GPU-based operations.
transposed_kernel_ = std::make_unique<Tensor>(tensor.DataType(), transposed_kernel_shape, alloc);

Check warning on line 359 in onnxruntime/core/providers/webgpu/nn/conv.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for make_unique<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/webgpu/nn/conv.cc:359: Add #include <memory> for make_unique<> [build/include_what_you_use] [4]

// Perform GPU-based transpose directly from the input GPU tensor
ORT_RETURN_IF_ERROR(Transpose::DoTranspose(context, perm, tensor, *transposed_kernel_));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,8 @@ WebGpuExecutionProvider::WebGpuExecutionProvider(int context_id,
context_{context},
preferred_data_layout_{config.data_layout},
force_cpu_node_names_{std::move(config.force_cpu_node_names)},
enable_graph_capture_{config.enable_graph_capture} {
enable_graph_capture_{config.enable_graph_capture},
prepack_allocator_{std::make_shared<webgpu::GpuBufferAllocator>(context_.InitializerBufferManager(), false)} {
// If graph capture is enabled, create a dedicated buffer manager for graph mode
if (enable_graph_capture_) {
// Create buffer manager for graph capture mode with appropriate cache modes
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/webgpu/webgpu_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class WebGpuExecutionProvider : public IExecutionProvider {
bool IsGraphCaptured(int graph_annotation_id) const override;
Status ReplayGraph(int graph_annotation_id) override;
webgpu::BufferManager& BufferManager() const;
AllocatorPtr PrepackAllocator() const { return prepack_allocator_; }

private:
bool IsGraphCaptureAllowed() const;
Expand All @@ -105,6 +106,9 @@ class WebGpuExecutionProvider : public IExecutionProvider {

// Store captured commands directly in the EP instead of in WebGpuContext
std::vector<webgpu::CapturedCommandInfo> captured_commands_;

// Allocator for prepacked weights (uses buffers without mapping)
AllocatorPtr prepack_allocator_;
};

} // namespace onnxruntime
5 changes: 3 additions & 2 deletions onnxruntime/core/providers/webgpu/webgpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Status WebGpuKernel::Compute(OpKernelContext* p_op_kernel_context) const {
return s;
}

Status WebGpuKernel::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
Status WebGpuKernel::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr /*alloc*/,
/*out*/ bool& is_packed, /*out*/ PrePackedWeights* /* prepacked_weights */) {
ComputeContextBase context{webgpu_context_, ep_, *this};

Expand All @@ -45,8 +45,9 @@ Status WebGpuKernel::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr a
// Currently, ORT does not allow using prepacked weights in non-CPU EPs.
// So we do not pass prepacked_weights to PrePackInternal.
// Kernel implementation that supports prepacking should manage its own storage.
// Use the EP's prepack allocator which creates unmapped GPU buffers.

Status s = PrePackInternal(context, tensor, input_idx, alloc, is_packed);
Status s = PrePackInternal(context, tensor, input_idx, ep_.PrepackAllocator(), is_packed);

if (webgpu_context_.ValidationMode() >= ValidationMode::Full) {
ORT_RETURN_IF_ERROR(webgpu_context_.PopErrorScope());
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/webgpu/webgpu_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class WebGpuKernel : public OpKernel {
// @param context The WebGPU compute context base providing access to the execution environment.
// @param tensor The constant tensor to potentially pre-process.
// @param input_idx The index of this input in the kernel's input list.
// @param alloc The allocator to use for any new tensor allocations.
// @param alloc The allocator to use for any new tensor allocations (prepack allocator).
// @param is_packed Output parameter. Set to true if the tensor was pre-packed/processed,
// false otherwise. The default implementation sets this to false.
//
Expand Down
Loading