microsoft · fs-eire · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -147,6 +147,7 @@ option(onnxruntime_TVM_USE_LLVM "Build TVM with LLVM. Set customized path to llv
 option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algorithm. It is defined for TVM only")
 option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF)
 option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
+option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF)
 
 # Options related to reducing the binary size produced by the build
 # XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
@@ -906,6 +907,11 @@ if (onnxruntime_USE_WEBNN)
   list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBNN=1)
   list(APPEND ONNXRUNTIME_PROVIDER_NAMES webnn)
 endif()
+if (onnxruntime_USE_WEBGPU)
+  list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBGPU=1)
+  list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBGPU=1)
+  list(APPEND ONNXRUNTIME_PROVIDER_NAMES webgpu)
+endif()
 if (onnxruntime_USE_CANN)
     list(APPEND ORT_PROVIDER_FLAGS  -DUSE_CANN=1)
     list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CANN=1)

diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
@@ -633,6 +633,20 @@ if (onnxruntime_USE_COREML)
   FetchContent_Populate(coremltools)
 endif()
 
+if (onnxruntime_USE_WEBGPU)
+  FetchContent_Declare(
+    dawn
+    URL ${DEP_URL_dawn}
+    URL_HASH SHA1=${DEP_SHA1_dawn}
+  )
+  set(DAWN_FETCH_DEPENDENCIES ON)
+  set(DAWN_ENABLE_INSTALL ON)
+  set(TINT_BUILD_TESTS OFF)
+  set(DAWN_USE_BUILT_DXC ON)
+  set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF)
+  onnxruntime_fetchcontent_makeavailable(dawn)
+endif()
+
 message(STATUS "Finished fetching external dependencies")
 
 set(onnxruntime_LINK_DIRS )

diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
@@ -38,7 +38,7 @@ function(get_c_cxx_api_headers HEADERS_VAR)
 
   # need to add header files for enabled EPs
   foreach(f ${ONNXRUNTIME_PROVIDER_NAMES})
-    # The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory 
+    # The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory
     # with onnxruntime_c_api.h . Most other EPs probably also do not work in this way.
     if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm))
       file(GLOB _provider_headers CONFIGURE_DEPENDS
@@ -200,6 +200,7 @@ set(onnxruntime_INTERNAL_LIBRARIES
   ${PROVIDERS_RKNPU}
   ${PROVIDERS_VSINPU}
   ${PROVIDERS_XNNPACK}
+  ${PROVIDERS_WEBGPU}
   ${PROVIDERS_WEBNN}
   ${PROVIDERS_AZURE}
   ${PROVIDERS_INTERNAL_TESTING}

diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
@@ -110,6 +110,9 @@ endif()
 if(onnxruntime_USE_WEBNN)
   set(PROVIDERS_WEBNN onnxruntime_providers_webnn)
 endif()
+if(onnxruntime_USE_WEBGPU)
+  set(PROVIDERS_WEBGPU onnxruntime_providers_webgpu)
+endif()
 if (onnxruntime_USE_CANN)
   set(PROVIDERS_CANN onnxruntime_providers_cann)
 endif()
@@ -151,6 +154,10 @@ if (onnxruntime_USE_WEBNN)
   include(onnxruntime_providers_webnn.cmake)
 endif()
 
+if (onnxruntime_USE_WEBGPU)
+  include(onnxruntime_providers_webgpu.cmake)
+endif()
+
 if (onnxruntime_USE_NNAPI_BUILTIN)
   include(onnxruntime_providers_nnapi.cmake)
 endif()

diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake
@@ -40,6 +40,11 @@ file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
 )
 
+file(GLOB_RECURSE onnxruntime_webgpu_contrib_ops_cc_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.h"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.cc"
+)
+
 file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/*.h"
   "${ONNXRUNTIME_ROOT}/core/providers/*.cc"
@@ -60,7 +65,7 @@ if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
       "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/aten_ops/aten_op_executor.cc"
     )
   endif()
-  set(onnxruntime_cpu_neural_speed_srcs 
+  set(onnxruntime_cpu_neural_speed_srcs
     "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_wrapper.h"
     "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_defs.h"
     "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/quantization/neural_speed_gemm.cc"

diff --git a/cmake/onnxruntime_providers_webgpu.cmake b/cmake/onnxruntime_providers_webgpu.cmake
@@ -0,0 +1,37 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
+    message(FATAL_ERROR "WebGPU EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
+  endif()
+
+  # find_package(Dawn REQUIRED)
+
+  add_compile_definitions(USE_WEBGPU=1)
+  if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+    add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
+  endif()
+  file(GLOB_RECURSE onnxruntime_providers_webgpu_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.cc"
+    # "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
+    # "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
+  )
+  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
+    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_webgpu_contrib_ops_cc_srcs})
+    list(APPEND onnxruntime_providers_webgpu_cc_srcs ${onnxruntime_webgpu_contrib_ops_cc_srcs})
+  endif()
+
+  source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_webgpu onnxruntime_common onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+  target_link_libraries(onnxruntime_providers_webgpu dawn::webgpu_dawn)
+
+  # Copy webgpu_dawn.dll to the output directory
+  add_custom_command(
+    TARGET onnxruntime_providers_webgpu
+    POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different "$<TARGET_FILE:dawn::webgpu_dawn>" "$<TARGET_FILE_DIR:onnxruntime_providers_webgpu>"
+    VERBATIM )
+
+  set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime")
diff --git a/cmake/onnxruntime_providers_webnn.cmake b/cmake/onnxruntime_providers_webnn.cmake
@@ -22,4 +22,4 @@
 
   add_dependencies(onnxruntime_providers_webnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
   set_target_properties(onnxruntime_providers_webnn PROPERTIES FOLDER "ONNXRuntime")
-  set_target_properties(onnxruntime_providers_webnn PROPERTIES LINKER_LANGUAGE CXX)
+  set_target_properties(onnxruntime_providers_webnn PROPERTIES LINKER_LANGUAGE CXX)
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
@@ -180,6 +180,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE
     ${PROVIDERS_ACL}
     ${PROVIDERS_ARMNN}
     ${PROVIDERS_XNNPACK}
+    ${PROVIDERS_WEBGPU}
     ${PROVIDERS_AZURE}
     ${PROVIDERS_QNN}
     onnxruntime_optimizer

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -557,6 +557,10 @@ if(onnxruntime_USE_JSEP)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
 endif()
 
+if(onnxruntime_USE_WEBGPU)
+  list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
+endif()
+
 if(onnxruntime_USE_RKNPU)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu)
 endif()
@@ -598,6 +602,7 @@ set(ONNXRUNTIME_TEST_LIBS
     ${PROVIDERS_NNAPI}
     ${PROVIDERS_VSINPU}
     ${PROVIDERS_JS}
+    ${PROVIDERS_WEBGPU}
     ${PROVIDERS_QNN}
     ${PROVIDERS_SNPE}
     ${PROVIDERS_RKNPU}
@@ -658,6 +663,13 @@ if(onnxruntime_USE_JSEP)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js)
 endif()
 
+if(onnxruntime_USE_WEBGPU)
+  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/webgpu/*)
+  list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_webgpu)
+  list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
+  list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_webgpu)
+endif()
+
 # QNN EP tests require CPU EP op implementations for accuracy evaluation, so disable on minimal
 # or reduced op builds.
 if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
@@ -1112,6 +1124,22 @@ if (NOT IOS)
             LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
             BUNDLE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
             RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+    ## TODO: remove this when merging to main branch
+    #
+    #        should support better test runner
+    #
+    if (onnxruntime_USE_WEBGPU)
+      add_custom_command(
+        TARGET onnx_test_runner
+        POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        "${ONNXRUNTIME_ROOT}/test/providers/webgpu/test_webgpu.js"
+        "${ONNXRUNTIME_ROOT}/test/providers/webgpu/test_webgpu.bat"
+        "$<TARGET_FILE_DIR:onnx_test_runner>"
+        VERBATIM )
+    endif()
+
 endif()
 
 if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)

diff --git a/include/onnxruntime/core/graph/constants.h b/include/onnxruntime/core/graph/constants.h
@@ -50,6 +50,7 @@ constexpr const char* kSnpeExecutionProvider = "SNPEExecutionProvider";
 constexpr const char* kTvmExecutionProvider = "TvmExecutionProvider";
 constexpr const char* kXnnpackExecutionProvider = "XnnpackExecutionProvider";
 constexpr const char* kWebNNExecutionProvider = "WebNNExecutionProvider";
+constexpr const char* kWebGpuExecutionProvider = "WebGpuExecutionProvider";
 constexpr const char* kCannExecutionProvider = "CANNExecutionProvider";
 constexpr const char* kAzureExecutionProvider = "AzureExecutionProvider";
 constexpr const char* kVSINPUExecutionProvider = "VSINPUExecutionProvider";

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -624,6 +624,38 @@ typedef struct OrtMIGraphXProviderOptions {
   bool migraphx_exhaustive_tune;                     // migraphx tuned compile  Default = false
 } OrtMIGraphXProviderOptions;
 
+/** \brief WebGPU Execution Provider Options
+ *
+ * When a user wants to use WebGPU as the execution provider, there are 2 ways to specify the WebGPU device:
+ *
+ * 1. Use the default WebGPU device. The default WebGPU device is managed by WebGPU EP internally. The user doesn't
+ *    need to provide any device information in this case. All the fields should be set to nullptr or 0.
+ *
+ * 2. Use a custom WebGPU device. The user should create their own handles of `WGPUInstance`, `WGPUAdapter`, and
+ *    `WGPUDevice` and use arbitrary number in [1..65536) as the device id. The user should provide the handles
+ *    and the device id in the options.
+ *
+ *    When specifying an existing Device ID, the user should provide the handles of `WGPUInstance`, `WGPUAdapter`, and
+ *    `WGPUDevice` in the options. The device id should be the same as the one used previously.
+ *
+ *    It's user's responsibility to manage the lifecycle of the handles and ensure the handles are valid during the
+ *    lifetime of the inference session.
+ *
+ * About DawnProcTable:
+ *
+ * When using an ONNX Runtime build that is not directly linked dawn during the build, a pointer to the runtime memory
+ * address of the DawnProcTable should be provided. Otherwise, keep it as nullptr.
+ *
+ * \see OrtApi::SessionOptionsAppendExecutionProvider_WGPU
+ */
+typedef struct OrtWGPUProviderOptions {
+  int device_id;          // WebGPU device id.
+  void* instance_handle;  // WebGPU instance handle.
+  void* adapter_handle;   // WebGPU adapter handle.
+  void* device_handle;    // WebGPU device handle.
+  void* dawn_proc_table;  // DawnProcTable pointer.
+} OrtWGPUProviderOptions;
+
 /** \brief OpenVINO Provider Options
  *
  * \see OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
@@ -4670,6 +4702,37 @@ struct OrtApi {
                   _In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array,
                   _In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths,
                   size_t num_external_initializer_files);
+
+  /** \brief Append WebGPU execution provider to session options
+   *
+   * If WebGPU is not available, this function will return failure.
+   *
+   * \param[in] options
+   * \param[in] wgpu_options - specify the WebGPU provider options.
+   * \param[in] string_options_keys - keys to configure the string options
+   * \param[in] string_options_values - values to configure the string options
+   * \param[in] num_keys - number of keys passed in
+   *
+   * Supported keys are listed as below. All entries are optional.
+   *
+   * | Key                            | Possible Values                                | Default Value  |
+   * | ------------------------------ | ---------------------------------------------- | -------------- |
+   * | "preferredLayout"              | "NHWC" or "NCHW"                               | "NHWC"         |
+   * | "enableGraphCapture"           | "1" or "0"                                     | "0"            |
+   * | "storageBufferCacheMode"       | "disabled", "lazyRelease", "simple", "bucket"  | "bucket"       |
+   * | "uniformBufferCacheMode"       | "disabled", "lazyRelease", "simple", "bucket"  | "lazyRelease"  |
+   * | "queryResolveBufferCacheMode"  | "disabled", "lazyRelease", "simple", "bucket"  | "disabled"     |
+   * | "defaultBufferCacheMode"       | "disabled", "lazyRelease", "simple", "bucket"  | "disabled"     |
+   *
+   * \snippet{doc} snippets.dox OrtStatus Return Value
+   *
+   * \since Version 1.20.
+   */
+  ORT_API2_STATUS(SessionOptionsAppendExecutionProvider_WGPU,
+                  _In_ OrtSessionOptions* options, _In_ const OrtWGPUProviderOptions* wgpu_options,
+                  _In_reads_(num_keys) const char* const* string_options_keys,
+                  _In_reads_(num_keys) const char* const* string_options_values,
+                  _In_ size_t num_keys);
 };
 
 /*

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -890,6 +890,9 @@
   SessionOptionsImpl& AppendExecutionProvider_CANN(const OrtCANNProviderOptions& provider_options);
   ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_Dnnl
   SessionOptionsImpl& AppendExecutionProvider_Dnnl(const OrtDnnlProviderOptions& provider_options);
+  ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_WGPU
+  SessionOptionsImpl& AppendExecutionProvider_WGPU(const OrtWGPUProviderOptions& wgpu_options,
+                                                   const std::unordered_map<std::string, std::string>& string_options = {});
   /// Wraps OrtApi::SessionOptionsAppendExecutionProvider. Currently supports QNN, SNPE and XNNPACK.
   SessionOptionsImpl& AppendExecutionProvider(const std::string& provider_name,
                                               const std::unordered_map<std::string, std::string>& provider_options = {});

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -838,6 +838,25 @@
   return *this;
 }
 
+template <typename T>
+inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_WGPU(const OrtWGPUProviderOptions& wgpu_options,
+                                                                                  const std::unordered_map<std::string, std::string>& string_options) {
+  auto num_entries = string_options.size();
+  std::vector<const char*> keys, values;
+  if (num_entries > 0) {
+    keys.reserve(num_entries);
+    values.reserve(num_entries);
+
+    for (const auto& entry : string_options) {
+      keys.push_back(entry.first.c_str());
+      values.push_back(entry.second.c_str());
+    }
+  }
+
+  ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_WGPU(this->p_, &wgpu_options, keys.data(), values.data(), num_entries));
+  return *this;
+}
+
 template <typename T>
 inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_CANN(const OrtCANNProviderOptions& provider_options) {
   ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_CANN(this->p_, &provider_options));