diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md
index 36436ec368ffa..102dce0737296 100644
--- a/docs/OperatorKernels.md
+++ b/docs/OperatorKernels.md
@@ -43,6 +43,7 @@ Do not modify directly.*
 |||[9, 13]|**T** = tensor(double), tensor(float)|
 |||[7, 8]|**T** = tensor(double), tensor(float)|
 |BitShift|*in* X:**T**<br> *in* Y:**T**<br> *out* Z:**T**|11+|**T** = tensor(uint32), tensor(uint64), tensor(uint8)|
+|BlackmanWindow|*in* size:**T1**<br> *out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)<br/> **T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |Cast|*in* input:**T1**<br> *out* output:**T2**|13+|**T1** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)<br/> **T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |||[6, 12]|**T1** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)<br/> **T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |Ceil|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(float)|
@@ -69,6 +70,7 @@ Do not modify directly.*
 |Crop|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(float)|
 |CumSum|*in* x:**T**<br> *in* axis:**T2**<br> *out* y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T2** = tensor(int32), tensor(int64)|
 |||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T2** = tensor(int32), tensor(int64)|
+|DFT|*in* input:**T1**<br> *in* dft_length:**T2**<br> *out* output:**T1**|17+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int32), tensor(int64)|
 |DepthToSpace|*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(double), tensor(float)|
 |||[11, 12]|**T** = tensor(double), tensor(float)|
 |||[1, 10]|**T** = tensor(double), tensor(float)|
@@ -125,6 +127,8 @@ Do not modify directly.*
 |GreaterOrEqual|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T1**|16+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T1** = tensor(bool)|
 |||[12, 15]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T1** = tensor(bool)|
 |GridSample|*in* X:**T1**<br> *in* grid:**T1**<br> *out* Y:**T2**|16+|**T1** = tensor(float)<br/> **T2** = tensor(float)|
+|HammingWindow|*in* size:**T1**<br> *out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)<br/> **T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
+|HannWindow|*in* size:**T1**<br> *out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)<br/> **T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |HardSigmoid|*in* X:**T**<br> *out* Y:**T**|6+|**T** = tensor(float)|
 |Hardmax|*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(float)|
 |||[11, 12]|**T** = tensor(float)|
@@ -186,6 +190,7 @@ Do not modify directly.*
 |MeanVarianceNormalization|*in* X:**T**<br> *out* Y:**T**<br><br>or<br><br>*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(float)|
 |||[9, 12]|**T** = tensor(float)|
 |||[1, 8]|**T** = tensor(float)|
+|MelWeightMatrix|*in* num_mel_bins:**T1**<br> *in* dft_length:**T1**<br> *in* sample_rate:**T1**<br> *in* lower_edge_hertz:**T2**<br> *in* upper_edge_hertz:**T2**<br> *out* output:**T3**|17+|**T1** = tensor(int32), tensor(int64)<br/> **T2** = tensor(float)<br/> **T3** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
 |Min|*in* data_0:**T**<br> *out* min:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
 |||12|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
 |||[8, 11]|**T** = tensor(double), tensor(float)|
@@ -277,6 +282,7 @@ Do not modify directly.*
 |RoiAlign|*in* X:**T1**<br> *in* rois:**T1**<br> *in* batch_indices:**T2**<br> *out* Y:**T1**|16+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int64)|
 |||[10, 15]|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int64)|
 |Round|*in* X:**T**<br> *out* Y:**T**|11+|**T** = tensor(double), tensor(float), tensor(float16)|
+|STFT|*in* signal:**T1**<br> *in* frame_step:**T2**<br> *in* window:**T1**<br> *in* frame_length:**T2**<br> *out* output:**T1**|17+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int32), tensor(int64)|
 |Scale|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(float)|
 |ScaledTanh|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(float)|
 |Scan|*in* initial_state_and_scan_inputs:**V**<br> *out* final_state_and_scan_outputs:**V**<br><br>or<br><br>*in* sequence_lens:**I**<br> *in* initial_state_and_scan_inputs:**V**<br> *out* final_state_and_scan_outputs:**V**|16+|**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
index 2068b3c3e3f1f..d89d30b62c737 100644
--- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
+++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
@@ -41,16 +41,6 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FastG
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, NGramRepeatBlock);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BifurcationDetector);
 
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, DFT);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, IDFT);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, HannWindow);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, HammingWindow);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, BlackmanWindow);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, MelWeightMatrix);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, STFT);
-#endif
-
 // ******** Start: Quantization ******************* //
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulInteger16);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearGlobalAveragePool);
@@ -224,16 +214,6 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FastGelu)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, NGramRepeatBlock)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BifurcationDetector)>,
-
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, DFT)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, IDFT)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, HannWindow)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, HammingWindow)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, BlackmanWindow)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, MelWeightMatrix)>,
-    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, STFT)>,
-#endif
     // These ops were experimental ops in onnx domain which have been removed now. We add them here as
     // contrib ops to main backward compatibility
     BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Affine)>,
diff --git a/onnxruntime/contrib_ops/cpu/signal/dft.cc b/onnxruntime/contrib_ops/cpu/signal/dft.cc
deleted file mode 100644
index 87aac44976540..0000000000000
--- a/onnxruntime/contrib_ops/cpu/signal/dft.cc
+++ /dev/null
@@ -1,606 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
-
-#include "core/providers/common.h"
-#include "core/framework/op_kernel.h"
-#include "core/util/math_cpuonly.h"
-#include "Eigen/src/Core/Map.h"
-#include "dft.h"
-#include <functional>
-
-#include "core/platform/threadpool.h"
-
-#include <complex>
-#include <cmath>
-
-namespace onnxruntime {
-namespace contrib {
-
-ONNX_OPERATOR_KERNEL_EX(
-    DFT,
-    kMSExperimentalDomain,
-    1,
-    kCpuExecutionProvider,
-    KernelDefBuilder().TypeConstraint("T1", BuildKernelDefConstraints<float, double>())
-                      .TypeConstraint("T2", BuildKernelDefConstraints<int32_t, int64_t>()),
-    DFT);
-
-ONNX_OPERATOR_KERNEL_EX(
-    IDFT,
-    kMSExperimentalDomain,
-    1,
-    kCpuExecutionProvider,
-    KernelDefBuilder().TypeConstraint("T1", BuildKernelDefConstraints<float, double>())
-                      .TypeConstraint("T2", BuildKernelDefConstraints<int64_t>()),
-    IDFT);
-
-ONNX_OPERATOR_KERNEL_EX(
-    STFT,
-    kMSExperimentalDomain,
-    1,
-    kCpuExecutionProvider,
-    KernelDefBuilder().MayInplace(0, 0).TypeConstraint("T1", BuildKernelDefConstraints<float, double>())
-                                       .TypeConstraint("T2", BuildKernelDefConstraints<int32_t, int64_t>()),
-    STFT);
-
-// dedupe with the other one in window_functions.cc
-template <typename T>
-static T get_scalar_value_from_tensor(const Tensor* tensor) {
-  ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value.");
-
-  auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType();
-  switch (data_type) {
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-      return static_cast<T>(*reinterpret_cast<const float*>(tensor->DataRaw()));
-    case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE:
-      return static_cast<T>(*reinterpret_cast<const double*>(tensor->DataRaw()));
-    case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-      return static_cast<T>(*reinterpret_cast<const int32_t*>(tensor->DataRaw()));
-    case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-      return static_cast<T>(*reinterpret_cast<const int64_t*>(tensor->DataRaw()));
-    default:
-      ORT_THROW("Unsupported input data type of ", data_type);
-  }
-}
-
-static bool is_real_valued_signal(const onnxruntime::TensorShape & shape) {
-  return shape.NumDimensions() == 2 || shape[shape.NumDimensions() - 1] == 1;
-}
-
-static bool is_complex_valued_signal(const onnxruntime::TensorShape& shape) {
-  return shape.NumDimensions() > 2 && shape[shape.NumDimensions() - 1] == 2;
-}
-
-static bool is_power_of_2(size_t size) {
-  unsigned n_bits = 0;
-  while (size != 0) {
-    n_bits += size & 1;
-    size = size >> 1;
-  }
-  return n_bits == 1;
-}
-
-static const unsigned char BitReverseTable256[] =
-{
-    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
-    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
-    0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
-    0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
-    0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
-    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
-    0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
-    0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
-    0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
-    0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
-    0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
-    0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
-    0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
-    0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
-    0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
-    0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF};
-
-template <unsigned TSignificantBits>
-uint32_t bit_reverse(uint32_t num) {
-  uint32_t rev = (BitReverseTable256[num & 0xff] << 24) |
-         (BitReverseTable256[(num >> 8) & 0xff] << 16) |
-         (BitReverseTable256[(num >> 16) & 0xff] << 8) |
-         (BitReverseTable256[(num >> 24) & 0xff]);
-  return static_cast<uint32_t>(((uint64_t)rev) >> (32 - TSignificantBits));
-}
-
-template <typename T>
-static inline T bit_reverse(T num, unsigned significant_bits) {
-  switch (significant_bits) {
-    case 0: return static_cast<T>(bit_reverse<0>(static_cast<uint32_t>(num)));
-    case 1: return static_cast<T>(bit_reverse<1>(static_cast<uint32_t>(num)));
-    case 2: return static_cast<T>(bit_reverse<2>(static_cast<uint32_t>(num)));
-    case 3: return static_cast<T>(bit_reverse<3>(static_cast<uint32_t>(num)));
-    case 4: return static_cast<T>(bit_reverse<4>(static_cast<uint32_t>(num)));
-    case 5: return static_cast<T>(bit_reverse<5>(static_cast<uint32_t>(num)));
-    case 6: return static_cast<T>(bit_reverse<6>(static_cast<uint32_t>(num)));
-    case 7: return static_cast<T>(bit_reverse<7>(static_cast<uint32_t>(num)));
-    case 8: return static_cast<T>(bit_reverse<8>(static_cast<uint32_t>(num)));
-    case 9: return static_cast<T>(bit_reverse<9>(static_cast<uint32_t>(num)));
-    case 10: return static_cast<T>(bit_reverse<10>(static_cast<uint32_t>(num)));
-    case 11: return static_cast<T>(bit_reverse<11>(static_cast<uint32_t>(num)));
-    case 12: return static_cast<T>(bit_reverse<12>(static_cast<uint32_t>(num)));
-    case 13: return static_cast<T>(bit_reverse<13>(static_cast<uint32_t>(num)));
-    case 14: return static_cast<T>(bit_reverse<14>(static_cast<uint32_t>(num)));
-    case 15: return static_cast<T>(bit_reverse<15>(static_cast<uint32_t>(num)));
-    case 16: return static_cast<T>(bit_reverse<16>(static_cast<uint32_t>(num)));
-    case 17: return static_cast<T>(bit_reverse<17>(static_cast<uint32_t>(num)));
-    case 18: return static_cast<T>(bit_reverse<18>(static_cast<uint32_t>(num)));
-    case 19: return static_cast<T>(bit_reverse<19>(static_cast<uint32_t>(num)));
-    case 20: return static_cast<T>(bit_reverse<20>(static_cast<uint32_t>(num)));
-    case 21: return static_cast<T>(bit_reverse<21>(static_cast<uint32_t>(num)));
-    case 22: return static_cast<T>(bit_reverse<22>(static_cast<uint32_t>(num)));
-    case 23: return static_cast<T>(bit_reverse<23>(static_cast<uint32_t>(num)));
-    case 24: return static_cast<T>(bit_reverse<24>(static_cast<uint32_t>(num)));
-    case 25: return static_cast<T>(bit_reverse<25>(static_cast<uint32_t>(num)));
-    case 26: return static_cast<T>(bit_reverse<26>(static_cast<uint32_t>(num)));
-    case 27: return static_cast<T>(bit_reverse<27>(static_cast<uint32_t>(num)));
-    case 28: return static_cast<T>(bit_reverse<28>(static_cast<uint32_t>(num)));
-    case 29: return static_cast<T>(bit_reverse<29>(static_cast<uint32_t>(num)));
-    case 30: return static_cast<T>(bit_reverse<30>(static_cast<uint32_t>(num)));
-    case 31: return static_cast<T>(bit_reverse<31>(static_cast<uint32_t>(num)));
-    case 32: return static_cast<T>(bit_reverse<32>(static_cast<uint32_t>(num)));
-    default: ORT_THROW("Unsupported bit size.");
-  }
-}
-
-template <typename T>
-static T compute_angular_velocity(size_t number_of_samples, bool inverse) {
-  // Calculate fundamental angular velocity
-  static const T pi = static_cast<T>(3.14159265);
-  static const T tau = 2 * pi;
-  T inverse_switch = inverse ? 1.f : -1.f;
-  T angular_velocity = inverse_switch * tau / number_of_samples;
-  return angular_velocity;
-}
-
-template <typename T, typename U>
-static Status fft_radix2(OpKernelContext* /*ctx*/,
-    const Tensor* X, Tensor* Y,
-    size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis, size_t dft_length,
-    const Tensor* window, bool is_onesided, bool inverse,
-    std::vector<std::complex<T>>& V,
-    std::vector<std::complex<T>>& temp_output) {
-
-  // Get shape and significant bits
-  const auto& X_shape = X->Shape();
-  size_t number_of_samples = static_cast<size_t>(X_shape[axis]);
-  unsigned significant_bits = static_cast<unsigned>(log2(dft_length));
-
-  // Get data
-  auto* X_data = const_cast<U*>(reinterpret_cast<const U*>(X->DataRaw())) + X_offset;
-  // Get window
-  U* window_data = nullptr;
-  if (window) {
-    window_data = const_cast<U*>(reinterpret_cast<const U*>(window->DataRaw()));
-  }
-
-  size_t Y_data_stride = 1;
-  std::complex<T>* Y_data;
-  if (is_onesided) {
-    if (temp_output.size() != dft_length) {
-      temp_output = std::vector<std::complex<T>>(dft_length);
-    }
-    Y_data = temp_output.data();
-  } else {
-    Y_data = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
-    Y_data_stride = Y_stride;
-  }
-
-  auto angular_velocity = compute_angular_velocity<T>(dft_length, inverse);
-
-  // Create vandermonde matrix V ordered with the bit-reversed permutation
-  if (V.size() != dft_length) {
-    V = std::vector<std::complex<T>>(dft_length);  // e^(i *2*pi / N * k)
-    for (size_t i = 0; i < dft_length; i++) {
-      size_t bit_reversed_index = bit_reverse(i, significant_bits);
-      V[bit_reversed_index] = std::complex<T>(cos(i * angular_velocity), sin(i * angular_velocity));
-    }
-  }
-
-  for (size_t i = 0; i < dft_length; i++) {
-    size_t bit_reversed_index = bit_reverse(i, significant_bits);
-    auto x = (bit_reversed_index < number_of_samples) ?  * (X_data + bit_reversed_index * X_stride) : 0;
-    auto window_element = window_data ? *(window_data + bit_reversed_index) : 1;
-    *(Y_data + i*Y_data_stride) = std::complex<T>(1, 0) * x * window_element;
-  }
-
-  // Run fft_radix2
-  unsigned current_significant_bits = 0;
-  for (size_t i = 2; i <= dft_length; i <<= 1) {
-    size_t midpoint = i >> 1;
-    current_significant_bits++;
-
-    for (size_t k = 0; k < midpoint; k++) {
-      auto first_idx = bit_reverse(k, current_significant_bits);
-      auto second_idx = bit_reverse(midpoint + k, current_significant_bits);
-      for (size_t j = 0; j < dft_length; j += i) {
-        auto even_index = k + j;
-        auto odd_index  = k + j + midpoint;
-        std::complex<T>* even = (Y_data + even_index * Y_data_stride);
-        std::complex<T>* odd = (Y_data + odd_index * Y_data_stride);
-        std::complex<T> first = *even + (V[first_idx] * *odd);
-        std::complex<T> second = *even + (V[second_idx] * *odd);
-        *even = first;
-        *odd = second;
-      }
-    }
-  }
-
-  // Scale the output if inverse
-  if (inverse) {
-    for (size_t i = 0; i < dft_length; i++) {
-      std::complex<T>& val = *(Y_data + i * Y_data_stride);
-      val /= static_cast<T>(dft_length);
-    }
-  }
-
-  if (is_onesided) {
-    auto destination = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
-    for (size_t i = 0; i < dft_length; i++) {
-      *(destination + Y_stride * i) = *(Y_data + i * Y_data_stride);
-    }
-  }
-
-  return Status::OK();
-}
-
-template <typename T, typename U>
-static Status dft_naive(const Tensor* X, Tensor* Y,
-  size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis,
-  size_t dft_length, const Tensor* window, bool inverse) {
-  // Get shape and significant bits
-  const auto& X_shape = X->Shape();
-  size_t number_of_samples = static_cast<size_t>(X_shape[axis]);
-  const auto& Y_shape = Y->Shape();
-  size_t dft_output_size = static_cast<size_t>(Y_shape[axis]);
-
-  // Get data
-  auto* X_data = const_cast<U*>(reinterpret_cast<const U*>(X->DataRaw())) + X_offset;
-  auto* Y_data = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
-
-  U* window_data = nullptr;
-  if (window) {
-    window_data = const_cast<U*>(reinterpret_cast<const U*>(window->DataRaw()));
-  }
-
-  auto angular_velocity = compute_angular_velocity<T>(dft_length, inverse);
-
-  for (size_t i = 0; i < dft_output_size; i++) {
-    std::complex<T>& out = *(Y_data + i*Y_stride);
-    out.real(0);
-    out.imag(0);
-
-    for (size_t j = 0; j < dft_length; j++) {  // vectorize over this loop
-      auto exponential = std::complex<T>(cos(i * j * angular_velocity), sin(i * j * angular_velocity));
-      auto window_element = window_data ? * (window_data + j) : 1;
-      auto x = (j < number_of_samples) ? *(X_data + j * X_stride) : 0;
-      auto element = x * window_element;
-      out += exponential * element;
-    }
-
-    if (inverse) {
-      out /= static_cast<T>(dft_length);
-    }
-  }
-
-  return Status::OK();
-}
-
-template <typename T, typename U>
-static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis, int64_t dft_length, const Tensor* window, bool is_onesided, bool inverse,
-                                         std::vector<std::complex<T>>& V, std::vector<std::complex<T>>& temp_output) {
-  // Get shape
-  const auto& X_shape = X->Shape();
-  const auto& Y_shape = Y->Shape();
-
-  auto batch_and_signal_rank = X->Shape().NumDimensions();
-  auto total_dfts = static_cast<size_t>(X->Shape().Size() / X->Shape()[axis]);
-
-  auto is_input_real = X->Shape().NumDimensions() == 2 || X->Shape()[X->Shape().NumDimensions() - 1] == 1;
-  auto complex_input_factor = is_input_real ? 1 : 2;
-  if (X->Shape().NumDimensions() > 2)
-  {
-    total_dfts /= X->Shape()[X->Shape().NumDimensions() - 1];
-    batch_and_signal_rank -= 1;
-  }
-
-  // Calculate x/y offsets/strides
-  for (size_t i = 0; i < total_dfts; i++)
-  {
-    size_t X_offset = 0;
-    size_t X_stride = X_shape.SizeFromDimension(axis+1) / complex_input_factor;
-    size_t cumulative_packed_stride = total_dfts;
-    size_t temp = i;
-    for (size_t r = 0; r < batch_and_signal_rank; r++) {
-      if (r == static_cast<size_t>(axis))
-      {
-        continue;
-      }
-      cumulative_packed_stride /= X_shape[r];
-      auto index = temp / cumulative_packed_stride;
-      temp -= (index * cumulative_packed_stride);
-      X_offset += index * X_shape.SizeFromDimension(r + 1) / complex_input_factor;
-    }
-
-    size_t Y_offset = 0;
-    size_t Y_stride = Y_shape.SizeFromDimension(axis + 1) / 2;
-    cumulative_packed_stride = total_dfts;
-    temp = i;
-    for (size_t r = 0; r < batch_and_signal_rank; r++) {
-      if (r == static_cast<size_t>(axis))
-      {
-        continue;
-      }
-      cumulative_packed_stride /= X_shape[r];
-      auto index = temp / cumulative_packed_stride;
-      temp -= (index * cumulative_packed_stride);
-      Y_offset += index * Y_shape.SizeFromDimension(r + 1) / 2;
-    }
-
-    if (is_power_of_2(dft_length)) {
-      ORT_RETURN_IF_ERROR((fft_radix2<T, U>(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, is_onesided, inverse, V, temp_output)));
-    } else {
-      ORT_RETURN_IF_ERROR((dft_naive<T, U>(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, inverse)));
-    }
-  }
-
-  return Status::OK();
-}
-
-static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, bool is_onesided, bool inverse) {
-  // Get input shape
-  const auto* X = ctx->Input<Tensor>(0);
-  const auto* dft_length = ctx->Input<Tensor>(1);
-  const auto& X_shape = X->Shape();
-  const auto is_real_valued = is_real_valued_signal(X_shape);
-  const auto is_complex_valued = is_complex_valued_signal(X_shape);
-
-  // Get the rank of the input tensor
-  // Ensure that the axis is in the valid range of [-rank, rank)
-  auto rank = static_cast<int64_t>(X_shape.GetDims().size());
-  if (!(-rank <= axis && axis < rank)) {
-    ORT_RETURN_IF(!(-rank <= axis && axis < rank),
-                  "axis attribute value ",
-                  axis,
-                  " is invalid for a tensor of rank ",
-                  rank);
-  }
-  axis = (axis >= 0 ? axis : axis + rank);
-
-  int64_t number_of_samples = static_cast<int64_t>(X_shape[axis]);
-  if (dft_length) {
-    const auto& dft_length_shape = dft_length->Shape();
-    ORT_RETURN_IF(!dft_length_shape.IsScalar(), "dft_length must be a scalar value.");
-    number_of_samples = static_cast<int>(get_scalar_value_from_tensor<int64_t>(dft_length));
-    ORT_RETURN_IF(number_of_samples <= 0, "dft_length must be greater than zero.");
-  }
-
-  // Get the DFT output size. Onesided will return only the unique values!
-  // note: x >> 1 === std::floor(x / 2.f)
-  auto dft_output_size = is_onesided ?
-      ((number_of_samples >> 1) + 1) :
-      number_of_samples;
-
-  // Get output shape
-  auto Y_shape = onnxruntime::TensorShape(X_shape);
-  if (X_shape.NumDimensions() == 2)
-  {
-    Y_shape = onnxruntime::TensorShape({X_shape[0], dft_output_size, 2});
-  } else
-  {
-    Y_shape[Y_shape.NumDimensions() - 1] = 2;
-  }
-  Y_shape[axis] = dft_output_size;
-  auto Y = ctx->Output(0, Y_shape);
-
-  // Get data type
-  auto data_type = X->DataType();
-
-  auto element_size = data_type->Size();
-  if (element_size == sizeof(float)) {
-    std::vector<std::complex<float>> V;
-    std::vector<std::complex<float>> temp_output;
-    if (is_real_valued) {
-      ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, float>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
-    } else if (is_complex_valued) {
-      ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, std::complex<float>>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
-    } else {
-        ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type);
-    }
-  } else if (element_size == sizeof(double)) {
-    std::vector<std::complex<double>> V;
-    std::vector<std::complex<double>> temp_output;
-    if (is_real_valued) {
-      ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, double>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
-    } else if (is_complex_valued) {
-      ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, std::complex<double>>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
-    } else {
-      ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type);
-    }
-  } else {
-    ORT_THROW("Unsupported input data type of ", data_type);
-  }
-
-  return Status::OK();
-}
-
-Status DFT::Compute(OpKernelContext* ctx) const {
-  ORT_RETURN_IF_ERROR(
-    discrete_fourier_transform(ctx,
-                               axis_,
-                               is_onesided_,
-                               is_inverse_));
-  return Status::OK();
-}
-
-Status IDFT::Compute(OpKernelContext* ctx) const {
-  ORT_RETURN_IF_ERROR(
-    discrete_fourier_transform(ctx,
-                               axis_,
-                               false /*is_onesided_*/,
-                               true /*is_inverse_*/));
-  return Status::OK();
-}
-
-template <typename T, typename U>
-static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_onesided, bool /*inverse*/) {
-  // Attr("onesided"): default = 1
-  // Input(0, "signal") type = T1
-  // Input(1, "frame_length") type = T2
-  // Input(2, "window") type = T1, optional
-  // Input(3, "frame_step") type = T2
-  // Output(0, "output") type = T1
-
-  // Get signal
-  const auto* signal = ctx->Input<Tensor>(0);
-  const auto frame_step = get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(1));
-  const auto* window = ctx->Input<Tensor>(2);
-  const auto* frame_length_tensor = ctx->Input<Tensor>(3);
-
-  // Get input signal shape
-  const auto& signal_shape = signal->Shape();
-  const auto batch_size = signal_shape[0];
-  const auto signal_size = signal_shape[1];
-  const auto signal_components =
-      signal_shape.NumDimensions() == 2 ? 1 : signal_shape.NumDimensions() == 3 ? signal_shape[2] : 0;  // error
-  ORT_ENFORCE(signal_components == 1 || signal_components == 2, "Ensure that the signal has either 1 or 2 components.");
-
-  // Get the frame length
-  int64_t frame_length = std::numeric_limits<int64_t>::min();
-  if (frame_length_tensor)
-  {
-    frame_length = get_scalar_value_from_tensor<int64_t>(frame_length_tensor);
-  }
-
-  // Get window length
-  int64_t window_length = std::numeric_limits<int64_t>::min();
-   if (window) {
-    window_length = window->Shape()[0];
-  }
-
-  // The frame_length and window inputs are generally used interchangably, and should match!
-  if (frame_length != std::numeric_limits<int64_t>::min() &&
-      window_length != std::numeric_limits<int64_t>::min()) {
-    ORT_ENFORCE(frame_length == window_length, "If both frame_length and window are set, then the size of the window must be equal to the frame_length.");
-  }
-
-  // Calculate the window size with preference to the window input.
-  const auto window_size = window ? window->Shape()[0] : frame_length;
-  ORT_ENFORCE(window_size < signal_size, "Ensure that the dft size is smaller than the signal.");
-
-  // Calculate the number of dfts to run
-  const auto n_dfts = static_cast<int64_t>(std::floor((signal_size - window_size) / static_cast<float>(frame_step)) + 1);
-
-  // Calculate the output spectra length (onesided will return only the unique values)
-  // note: x >> 1 === std::floor(x / 2.f)
-  const auto dft_output_size =
-      is_onesided ?
-        (window_size >> 1) + 1 :
-        window_size;
-
-  // Get/create the output mutable data
-  auto output_spectra_shape = onnxruntime::TensorShape({batch_size, n_dfts, dft_output_size, 2});
-  auto Y = ctx->Output(0, output_spectra_shape);
-  auto Y_data = reinterpret_cast<T*>(Y->MutableDataRaw());
-
-  // Get/create the signal mutable data
-  auto* signal_data = const_cast<U*>(reinterpret_cast<const U*>(signal->DataRaw()));
-
-  // Define tensor shapes for each dft run
-  const int64_t output_components = 2;
-  auto dft_input_shape = onnxruntime::TensorShape({1, window_size, signal_components});
-  auto dft_output_shape = onnxruntime::TensorShape({1, dft_output_size, output_components});
-
-  std::vector<std::complex<T>> V;
-  std::vector<std::complex<T>> temp_output;
-
-  // Run each dft of each batch as if it was a real-valued batch size 1 dft operation
-  for (int64_t batch_idx = 0; batch_idx < batch_size; batch_idx++) {
-    for (int64_t i = 0; i < n_dfts; i++) {
-      auto input_frame_begin =
-        signal_data +
-        (batch_idx * signal_size * signal_components) +
-        (i * frame_step * signal_components);
-
-      auto output_frame_begin =
-        Y_data +
-        (batch_idx * n_dfts * dft_output_size * output_components) +
-        (i * dft_output_size * output_components);
-
-      // Tensors do not own the backing memory, so no worries on destruction
-      auto input =
-          onnxruntime::Tensor(
-              signal->DataType(),
-              dft_input_shape,
-              input_frame_begin,
-              signal->Location(),
-              0);
-
-      auto output =
-          onnxruntime::Tensor(
-              Y->DataType(),
-              dft_output_shape,
-              output_frame_begin,
-              Y->Location(),
-              0);
-
-      // Run individual dft
-      ORT_RETURN_IF_ERROR((discrete_fourier_transform<T, U>(ctx, &input, &output, 1, window_size, window, is_onesided, false, V, temp_output)));
-    }
-  }
-
-  return Status::OK();
-}
-
-Status STFT::Compute(OpKernelContext* ctx) const {
-  // Attr("onesided"): default = 1
-  // Input(0, "signal") type = T1
-  // Input(1, "frame_length") type = T2
-  // Input(2, "window") type = T1, optional
-  // Input(3, "frame_step") type = T2
-  // Output(0, "output") type = T1
-
-  // Get signal shape
-  const auto* signal = ctx->Input<Tensor>(0);
-  const auto& signal_shape = signal->Shape();
-  const auto is_real_valued = is_real_valued_signal(signal_shape);
-  const auto is_complex_valued = is_complex_valued_signal(signal_shape);
-
-  // Get data type
-  auto data_type = signal->DataType();
-
-  const auto element_size = data_type->Size();
-  if (element_size == sizeof(float)) {
-    if (is_real_valued) {
-      ORT_RETURN_IF_ERROR((short_time_fourier_transform<float, float>(ctx, is_onesided_, false)));
-    } else if (is_complex_valued) {
-      ORT_RETURN_IF_ERROR((short_time_fourier_transform<float, std::complex<float>>(ctx, is_onesided_, false)));
-    } else {
-      ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type);
-    }
-  } else if (element_size == sizeof(double)) {
-    if (is_real_valued) {
-      ORT_RETURN_IF_ERROR((short_time_fourier_transform<double, double>(ctx, is_onesided_, false)));
-    } else if (is_complex_valued) {
-      ORT_RETURN_IF_ERROR((short_time_fourier_transform<double, std::complex<double>>(ctx, is_onesided_, false)));
-    } else {
-      ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type);
-    }
-  } else {
-    ORT_THROW("Unsupported input data type of ", data_type);
-  }
-
-  return Status::OK();
-}
-
-}  // namespace contrib
-}  // namespace onnxruntime
-
-#endif
diff --git a/onnxruntime/contrib_ops/cpu/signal/window_functions.cc b/onnxruntime/contrib_ops/cpu/signal/window_functions.cc
deleted file mode 100644
index 29256adb264d0..0000000000000
--- a/onnxruntime/contrib_ops/cpu/signal/window_functions.cc
+++ /dev/null
@@ -1,334 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
-
-#include "core/providers/common.h"
-#include "core/framework/op_kernel.h"
-#include "core/util/math_cpuonly.h"
-#include "Eigen/src/Core/Map.h"
-#include "window_functions.h"
-#include <functional>
-
-#include "core/platform/threadpool.h"
-
-#include <complex>
-#include <cmath>
-
-namespace onnxruntime {
-namespace contrib {
-
-ONNX_OPERATOR_KERNEL_EX(
-    HannWindow,
-    kMSExperimentalDomain,
-    1,
-    kCpuExecutionProvider,
-    KernelDefBuilder().MayInplace(0, 0)
-        .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())
-        .TypeConstraint("T2", BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>()),
-    HannWindow);
-
-ONNX_OPERATOR_KERNEL_EX(
-    HammingWindow,
-    kMSExperimentalDomain,
-    1,
-    kCpuExecutionProvider,
-    KernelDefBuilder().MayInplace(0, 0)
-        .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())
-        .TypeConstraint("T2", BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>()),
-    HammingWindow);
-
-ONNX_OPERATOR_KERNEL_EX(
-    BlackmanWindow,
-    kMSExperimentalDomain,
-    1,
-    kCpuExecutionProvider,
-    KernelDefBuilder().MayInplace(0, 0)
-        .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())
-        .TypeConstraint("T2", BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>()),
-    BlackmanWindow);
-
-
-ONNX_OPERATOR_KERNEL_EX(
-    MelWeightMatrix,
-    kMSExperimentalDomain,
-    1,
-    kCpuExecutionProvider,
-    KernelDefBuilder().MayInplace(0, 0)
-        .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())
-        .TypeConstraint("T2", BuildKernelDefConstraints<float>())
-        .TypeConstraint("T3", BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>()),
-    MelWeightMatrix);
-
-
-template <typename T>
-static Status cosine_sum_window(Tensor* Y, size_t size, float a0, float a1, float a2) {
-  auto* Y_data = reinterpret_cast<T*>(Y->MutableDataRaw());
-
-  // Calculate the radians to increment per sample
-  constexpr double pi = 3.14159265;
-  constexpr double tau = 2 * pi;
-  const double angular_increment = tau / size;
-
-  for (size_t i = 0; i < size; i++) {
-    auto a2_component = a2 == 0 ? 0 : (a2 * cos(2 * angular_increment * i));
-
-    T& value = *(Y_data + i);
-    value = static_cast<T>(a0 - (a1 * cos(angular_increment * i)) + a2_component);
-  }
-
-  return Status::OK();
-}
-
-template <typename T>
-static T get_scalar_value_from_tensor(const Tensor* tensor) {
-  ORT_ENFORCE(tensor->Shape().Size() == 1, "Tensor input should have a single value.");
-  auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType();
-  switch (data_type) {
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-      return static_cast<T>(*reinterpret_cast<const float*>(tensor->DataRaw()));
-    case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE:
-      return static_cast<T>(*reinterpret_cast<const double*>(tensor->DataRaw()));
-    case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-      return static_cast<T>(*reinterpret_cast<const int32_t*>(tensor->DataRaw()));
-    case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-      return static_cast<T>(*reinterpret_cast<const int64_t*>(tensor->DataRaw()));
-    default:
-      ORT_THROW("Unsupported input data type of ", data_type);
-  }
-}
-
-static Status create_cosine_sum_window(
-    OpKernelContext* ctx,
-    onnx::TensorProto_DataType output_datatype,
-    float a0, float a1, float a2) {
-
-  // Get the size of the window
-  auto size = get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(0));
-
-  // Get the output tensor
-  auto Y_shape = onnxruntime::TensorShape({size});
-  auto Y = ctx->Output(0, Y_shape);
-
-  switch (output_datatype) {
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<float>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<double>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<int8_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<int16_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<int32_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<int64_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<uint8_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<uint16_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT32: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<uint32_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT64: {
-      ORT_RETURN_IF_ERROR((cosine_sum_window<uint64_t>(Y, size, a0, a1, a2)));
-      break;
-    }
-    default:
-      ORT_THROW("Unsupported input data type of ", output_datatype);
-  }
-
-  return Status::OK();
-}
-
-Status HannWindow::Compute(OpKernelContext* ctx) const {
-  // HannWindows are a special case of Cosine-Sum Windows which take the following form:
-  // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k:
-  float a0 = .5f;
-  float a1 = a0;
-  float a2 = 0;
-  return create_cosine_sum_window(ctx, data_type_, a0, a1, a2);
-}
-
-Status HammingWindow::Compute(OpKernelContext* ctx) const {
-  // HammingWindows are a special case of Cosine-Sum Windows which take the following form:
-  // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k:
-  float a0 = 25.f / 46.f;
-  float a1 = 1 - a0;
-  float a2 = 0;
-  return create_cosine_sum_window(ctx, data_type_, a0, a1, a2);
-}
-
-Status BlackmanWindow::Compute(OpKernelContext* ctx) const {
-  // BlackmanWindows are a special case of Cosine-Sum Windows which take the following form:
-  // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k:
-  float alpha = .16f;
-  float a2 = alpha / 2.f;
-  float a0 = .5f - a2;
-  float a1 = .5f;
-  return create_cosine_sum_window(ctx, data_type_, a0, a1, a2);
-}
-
-static inline double hz_to_mel_scale(double hz) {
-  return 2595 * std::log10(1 + hz / 700);
-}
-
-static inline double mel_scale_to_hz(double mels) {
-  return 700 * (pow(10, (mels / 2595)) - 1);
-}
-
-template <typename T>
-Status create_mel_weight_matrix(OpKernelContext* ctx, int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate, float lower_edge_hertz, float upper_edge_hertz) {
-  // Determine the width of the spectrogram.
-  // This is determined as half the size of the fft size. The first element of the spectrum is always retained,
-  // and the remaining are halved. The second half can be discarded due to the conjugate symmetry of the output with real valued ffts.
-  // Taken together the formula for the size of the output will be std::floor(dft_length / 2) + 1.
-  int64_t num_spectrogram_bins = static_cast<int64_t>(std::floor(dft_length / 2 + 1));
-
-  // Checks
-  auto lowest_index = std::floor(((dft_length + 1) * lower_edge_hertz) / sample_rate);
-  auto highest_index = std::floor(((dft_length + 1) * upper_edge_hertz) / sample_rate);
-  ORT_ENFORCE(lowest_index >= 0 && lowest_index < num_spectrogram_bins, "lower_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the sample_rate.");
-  ORT_ENFORCE(highest_index >= 0 && highest_index < num_spectrogram_bins, "upper_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the sample_rate.");
-
-  // Create the output shape
-  onnxruntime::TensorShape output_shape(
-      {
-          static_cast<int64_t>(num_spectrogram_bins),
-          num_mel_bins
-      });
-  auto* Y = ctx->Output(0, output_shape);
-
-  // Get the raw output data
-  auto* Y_data = reinterpret_cast<T*>(Y->MutableDataRaw());
-
-  // Set the weight matrix to 0
-  memset(Y_data, 0, num_spectrogram_bins * num_mel_bins * sizeof(T));
-
-  // The mel filterbank is a triangular shaped peak with a height of 1 and a base equal to the size of the MEL range divided by
-  // the number of bins needed times 2. This triagle is then slid across the mel domain linearly, with a constant step size that
-  // is equal to half of the base of the triange. To accomodate N bins, N+2 data points will be needed to determine the
-  // start, center and end points of each mel triange filter.
-  //
-  // low_frequency where the mel triangle filter banks begin, and they end on the high_frequency_mel
-  // The range is divided evenly to create the needed points corresponding to the begin, center, end points of each triangle filterbank
-  std::vector<size_t> frequency_bins(num_mel_bins + 2);
-  auto low_frequency_mel = hz_to_mel_scale(lower_edge_hertz);
-  auto high_frequency_mel = hz_to_mel_scale(upper_edge_hertz);
-  auto mel_step = (high_frequency_mel - low_frequency_mel) / static_cast<float>(frequency_bins.size());
-
-  // Convert each point from mel scale back to hertz, and then compute the corresponding index in the fft
-  for (size_t i = 0; i < frequency_bins.size(); i++) {
-    auto hz = mel_scale_to_hz(low_frequency_mel + mel_step * i);
-    frequency_bins[i] = static_cast<size_t>(std::floor(((dft_length + 1) * hz) / sample_rate));
-  }
-
-  for (size_t i = 0; i < static_cast<size_t>(num_mel_bins); i++) {
-    auto lower_frequency_value = frequency_bins[i];     //left
-    auto center_frequency_point = frequency_bins[i+1];  //center
-    auto higher_frequency_point = frequency_bins[i+2];  //right
-
-    auto low_to_center = center_frequency_point - lower_frequency_value;
-    if (low_to_center == 0) {
-      auto& current_element = *(Y_data + (center_frequency_point * num_mel_bins) + i);
-      current_element = static_cast<T>(1);
-    } else {
-      for (size_t j = lower_frequency_value; j <= center_frequency_point; j++) {
-        auto& current_element = *(Y_data + (j * num_mel_bins) + i);
-        current_element = static_cast<T>((j - lower_frequency_value) / static_cast<T>(low_to_center));
-      }
-    }
-
-    auto center_to_high = higher_frequency_point - center_frequency_point;
-    if (center_to_high > 0) {
-      for (size_t j = center_frequency_point; j < higher_frequency_point; j++) {
-        auto& current_element = *(Y_data + (j * num_mel_bins) + i);
-        current_element = static_cast<T>((higher_frequency_point - j) / static_cast<T>(center_to_high));
-      }
-    }
-  }
-
-  return Status::OK();
-}
-
-static Status create_mel_weight_matrix(OpKernelContext* ctx, onnx::TensorProto_DataType output_datatype,
-  int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate, float lower_edge_hertz, float upper_edge_hertz) {
-  switch (output_datatype) {
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<float>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<double>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<int8_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<int16_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<int32_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<int64_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<uint8_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<uint16_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT32: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<uint32_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT64: {
-      ORT_RETURN_IF_ERROR((create_mel_weight_matrix<uint64_t>(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)));
-      break;
-    }
-    default:
-      ORT_THROW("Unsupported input data type of ", output_datatype);
-  }
-  return Status::OK();
-}
-
-Status MelWeightMatrix::Compute(OpKernelContext* ctx) const {
-  const auto num_mel_bins = get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(0));
-  const auto dft_length = get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(1));
-  const auto sample_rate = get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(2));
-  const auto lower_edge_hertz = get_scalar_value_from_tensor<float>(ctx->Input<Tensor>(3));
-  const auto upper_edge_hertz = get_scalar_value_from_tensor<float>(ctx->Input<Tensor>(4));
-
-  ORT_RETURN_IF_ERROR(create_mel_weight_matrix(ctx, data_type_, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz));
-  return Status::OK();
-}
-
-}  // namespace contrib
-}  // namespace onnxruntime
-
-#endif
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
index 8cf2d278e0ead..fb7bc16cc190a 100644
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -16,7 +16,6 @@
 #include "core/graph/contrib_ops/range_schema_defs.h"
 #include "core/graph/op.h"
 #include "core/mlas/inc/mlas.h"
-#include "core/graph/signal_ops/signal_defs.h"
 #include "core/graph/contrib_ops/onnx_function_util.h"
 #include "onnx/defs/function.h"
 
@@ -370,7 +369,6 @@ void sparseCompatibleMatmulShapeInference(
   updateOutputShape(ctx, 0, resultShape, default_tensor_type);
 }
 
-
 bool ParseScalar(const TensorProto* initializer, int& value) {
   std::vector<int32_t> parsed_data;
   if (initializer->data_type() == TensorProto::INT32) {
@@ -2417,7 +2415,6 @@ void RegisterContribSchemas() {
         // }
         // updateOutputShape(ctx, 0, disentangled_attention_shape);
         propagateShapeFromInputToOutput(ctx, 0, 0);
-
       });
 
   ONNX_CONTRIB_OPERATOR_SCHEMA(Snpe)
@@ -2535,10 +2532,6 @@ This op functions in much the same was as Dropout-11 and Dropout-13 do, execpt t
     RegisterNchwcSchemas();
   }
 #endif
-
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
-  onnxruntime::signal::RegisterSignalSchemas();
-#endif
 }
 
 }  // namespace contrib
diff --git a/onnxruntime/core/graph/signal_ops/signal_defs.cc b/onnxruntime/core/graph/signal_ops/signal_defs.cc
deleted file mode 100644
index 27e077c9fefe4..0000000000000
--- a/onnxruntime/core/graph/signal_ops/signal_defs.cc
+++ /dev/null
@@ -1,738 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
-
-#include "core/framework/tensorprotoutils.h"
-#include "core/providers/common.h"
-#include "core/graph/constants.h"
-#include "core/graph/signal_ops/signal_defs.h"
-#include "core/graph/op.h"
-#include "onnx/defs/schema.h"
-#include "onnx/defs/shape_inference.h"
-#include "onnx/defs/tensor_proto_util.h"
-
-#include <cmath>
-
-namespace onnxruntime {
-namespace signal {
-
-using ONNX_NAMESPACE::AttributeProto;
-using ONNX_NAMESPACE::OpSchema;
-using ONNX_NAMESPACE::OPTIONAL_VALUE;
-
-template <typename T>
-static T get_scalar_value_from_tensor(const ONNX_NAMESPACE::TensorProto* t) {
-  if (t == nullptr) {
-    return T{};
-  }
-
-  auto data_type = t->data_type();
-  switch (data_type) {
-    case ONNX_NAMESPACE::TensorProto::FLOAT:
-      return static_cast<T>(ONNX_NAMESPACE::ParseData<float>(t).at(0));
-    case ONNX_NAMESPACE::TensorProto::DOUBLE:
-      return static_cast<T>(ONNX_NAMESPACE::ParseData<double>(t).at(0));
-    case ONNX_NAMESPACE::TensorProto::INT32:
-      return static_cast<T>(ONNX_NAMESPACE::ParseData<int32_t>(t).at(0));
-    case ONNX_NAMESPACE::TensorProto::INT64:
-      return static_cast<T>(ONNX_NAMESPACE::ParseData<int64_t>(t).at(0));
-    default:
-      ORT_THROW("Unsupported input data type of ", data_type);
-  }
-}
-
-inline const ONNX_NAMESPACE::TensorShapeProto* getOptionalInputShape(ONNX_NAMESPACE::InferenceContext& ctx, size_t n) {
-  const auto* input_type = ctx.getInputType(n);
-
-  if (input_type == nullptr) {
-    return nullptr;
-  }
-
-  const auto value_case = input_type->value_case();
-  if (value_case != ONNX_NAMESPACE::TypeProto::kTensorType && value_case != ONNX_NAMESPACE::TypeProto::kSparseTensorType) {
-    fail_type_inference("Attribute expected to have tensor or sparse tensor type");
-  }
-  if (value_case == ONNX_NAMESPACE::TypeProto::kTensorType) {
-    return &input_type->tensor_type().shape();
-  } else {
-    return &input_type->sparse_tensor_type().shape();
-  }
-}
-
-std::function<void(OpSchema&)> CosineSumWindowOpDocGenerator(const char* name) {
-  return [name](OpSchema& schema) {
-    std::string doc;
-    POPULATE_OP_DOC_STR(
-        doc = R"DOC(
-Generates a {name} window as described in the paper https://ieeexplore.ieee.org/document/1455106.
-)DOC";
-        ReplaceAll(doc, "{name}", name););
-
-    schema.SetDoc(doc);
-    schema.Attr("output_datatype",
-                "The data type of the output tensor. "
-                "Strictly must be one of the values from DataType enum in TensorProto whose values correspond to T2. "
-                "The default value is 1 = FLOAT. ",
-                AttributeProto::INT,
-                static_cast<int64_t>(onnx::TensorProto_DataType::TensorProto_DataType_FLOAT));
-    schema.Attr("periodic",
-                "If 1, returns a window to be used as periodic function. If 0, return a symmetric window. "
-                "When 'periodic' is specified, hann computes a window of length size + 1 and returns the first size points. "
-                "The default value is 1. ",
-                AttributeProto::INT,
-                static_cast<int64_t>(1));
-    schema.Input(0,
-                 "size",
-                 "A scalar value indicating the length of the window.",
-                 "T1",
-                 OpSchema::Single,
-                 true,
-                 1,
-                 OpSchema::NonDifferentiable);
-    schema.Output(0,
-                  "output",
-                  "A Hann window with length: size. "
-                  "The output has the shape: [size].",
-                  "T2",
-                  OpSchema::Single,
-                  true,
-                  1,
-                  OpSchema::NonDifferentiable);
-    schema.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
-      // Update the output data type to the output_datatype
-      auto output_datatype = getAttribute(ctx, "output_datatype",
-                                          static_cast<int64_t>(onnx::TensorProto_DataType::TensorProto_DataType_FLOAT));
-      updateOutputElemType(ctx, 0, static_cast<int32_t>(output_datatype));
-
-      if (!hasInputShape(ctx, 0)) {
-        // If no shape is available for the input, skip shape inference.
-        return;
-      }
-
-      const auto* size = ctx.getInputData(0);
-      if (size == nullptr) {
-        // Size is not available, so return early
-        return;
-      }
-
-      if (size->dims_size() != 0) {
-        fail_shape_inference("size input must be a scalar.");
-      }
-
-      auto size_value = get_scalar_value_from_tensor<int64_t>(size);
-      if (size_value <= 0) {
-        fail_shape_inference("size input must be greater than 0.");
-      }
-
-      ONNX_NAMESPACE::TensorShapeProto result_shape;
-      result_shape.add_dim()->set_dim_value(size_value);
-      updateOutputShape(ctx, 0, result_shape);
-    });
-  };
-}
-
-void RegisterSignalSchemas() {
-  MS_SIGNAL_OPERATOR_SCHEMA(DFT)
-      .SetDomain(kMSExperimentalDomain)
-      .SinceVersion(1)
-      .SetDoc(R"DOC(DFT)DOC")
-      .Attr("onesided",
-            "If True (default), only values for half of the fft size are returned because the real-to-complex Fourier transform satisfies the conjugate symmetry."
-            "The output tensor will return the first floor(n_fft/2) + 1 values from the DFT."
-            "Values can be 0 or 1.",
-            AttributeProto::AttributeType::AttributeProto_AttributeType_INT,
-            static_cast<int64_t>(0))
-      .Attr("axis",
-            "The axis on which to perform the DFT. By default this value is set to 0, which corresponds to the first dimension after the batch index."
-            "This value must be less than signal_dimN, where signal_dimN is the number of dimensions in the signal.",
-            AttributeProto::AttributeType::AttributeProto_AttributeType_INT,
-            static_cast<int64_t>(0))
-      .Attr("inverse",
-            "Whether to perform the inverse discrete fourier transform. By default this value is set to 0, which corresponds to false.",
-            AttributeProto::INT,
-            static_cast<int64_t>(0))
-      .Input(0,
-             "input",
-             "For real input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]. "
-             "For complex input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]. "
-             "The first dimension is the batch dimension. "
-             "The following N dimentions correspond to the signal's dimensions. "
-             "The final dimension represents the real and imaginary parts of the value in that order.",
-             "T1",
-             OpSchema::Single,
-             true,
-             1,
-             OpSchema::NonDifferentiable)
-      .Input(1,
-             "dft_length",
-             "The length of the signal."
-             "If greater than the axis dimension, the signal will be zero-padded up to dft_length. "
-             "If less than the axis dimension, only the first dft_length values will be used as the signal. "
-             "It's an optional value. ",
-             "T2",
-             OpSchema::Optional,
-             true,
-             1,
-             OpSchema::NonDifferentiable)
-      .Output(0,
-              "output",
-              "The Fourier Transform of the input vector."
-              "If onesided is 0, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]. "
-              "If axis=0 and onesided is 1, the following shape is expected: [batch_idx][floor(signal_dim1/2)+1][signal_dim2]...[signal_dimN][2]. "
-              "If axis=1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][floor(signal_dim2/2)+1]...[signal_dimN][2]. "
-              "If axis=N-1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[floor(signal_dimN/2)+1][2]. "
-              "The signal_dim at the specified axis is equal to the dft_length.",
-              "T1")
-      .TypeConstraint(
-          "T1",
-          {"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"},
-          "Constrain input and output types to float tensors.")
-      .TypeConstraint(
-          "T2",
-          {"tensor(int32)", "tensor(int64)"},
-          "Constrain scalar length types to int64_t.")
-      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
-        bool is_onesided = static_cast<bool>(getAttribute(ctx, "onesided", 0));
-        bool inverse = static_cast<bool>(getAttribute(ctx, "inverse", 0));
-
-        if (inverse && is_onesided) {
-          fail_shape_inference("is_onesided and inverse attributes cannot be enabled at the same time");
-        }
-
-        propagateElemTypeFromInputToOutput(ctx, 0, 0);
-        if (!hasInputShape(ctx, 0)) {
-          // If no shape is available for the input, skip shape inference...
-          return;
-        }
-
-        // In general the output shape will match the input shape exactly
-        // So initialize the output shape with the input shape
-        auto& input_shape = getInputShape(ctx, 0);
-        ONNX_NAMESPACE::TensorShapeProto result_shape_proto = input_shape;
-
-        // Get the axis where the DFT will be performed.
-        auto axis = static_cast<int>(getAttribute(ctx, "axis", 1));
-        auto rank = input_shape.dim_size();
-
-        if (!(-rank <= axis && axis < rank)) {
-          fail_shape_inference(
-              "axis attribute value ",
-              axis,
-              " is invalid for a tensor of rank ",
-              rank);
-        }
-
-        auto axis_idx = (axis >= 0 ? axis : axis + rank);
-
-        // If dft_length is specified, then we should honor the shape.
-        // Set the output dimension to match the dft_length on the axis.
-        // If onesided this will be adjusted later on...
-        const ONNX_NAMESPACE::TensorProto* dft_length = nullptr;
-        if (ctx.getNumInputs() >= 2 && ctx.getInputType(1) != nullptr) {
-          dft_length = ctx.getInputData(1);
-          if (dft_length == nullptr) {
-            // If we cannot read the dft_length, we cannot infer shape
-            // return...
-            return;
-          }
-        }
-
-        if (nullptr != dft_length) {
-          if (dft_length->dims_size() != 0) {
-            fail_shape_inference("dft_length input must be a scalar.");
-          }
-          auto dft_length_value = get_scalar_value_from_tensor<int64_t>(dft_length);
-          result_shape_proto.mutable_dim(axis_idx)->set_dim_value(dft_length_value);
-        }
-        // When DFT is onesided, the output shape is half the size of the input shape
-        // along the specified axis.
-        if (is_onesided) {
-          auto axis_dimension = result_shape_proto.dim(axis_idx);
-          // We need to update the output shape dimension along the specified axis,
-          // but sometimes the dimension will be a free dimension or be otherwise unset.
-          // Only perform inference when a input dimension value exists.
-          if (axis_dimension.has_dim_value()) {
-            auto original_signal_size = axis_dimension.dim_value();
-            auto half_signal_size = (original_signal_size >> 1) + 1;
-            result_shape_proto.mutable_dim(axis_idx)->set_dim_value(half_signal_size);
-          } else {
-            // Clear the value and param (which would otherwie be inherited from the input).
-            result_shape_proto.mutable_dim(axis_idx)->clear_dim_value();
-            result_shape_proto.mutable_dim(axis_idx)->clear_dim_param();
-          }
-        }
-
-        // Coerce the last dimension to 2.
-        auto dim_size = static_cast<int64_t>(result_shape_proto.dim_size());
-        auto has_component_dimension = dim_size > 2;
-
-        // This if check is retained in the contrib op and not the official spec for back compat
-        if (has_component_dimension) {
-          result_shape_proto.mutable_dim(static_cast<int>(dim_size - 1))->set_dim_value(2);
-        } else {
-          result_shape_proto.add_dim()->set_dim_value(2);
-        }
-
-        updateOutputShape(ctx, 0, result_shape_proto);
-      });
-
-  MS_SIGNAL_OPERATOR_SCHEMA(IDFT)
-      .SetDomain(kMSExperimentalDomain)
-      .SinceVersion(1)
-      .SetDoc(R"DOC(IDFT)DOC")
-      .Attr("axis",
-            "The axis on which to perform the DFT. By default this value is set to 0, which corresponds to the first dimension after the batch index."
-            "This value must be less than signal_dimN, where signal_dimN is the number of dimensions in the signal.",
-            AttributeProto::AttributeType::AttributeProto_AttributeType_INT,
-            static_cast<int64_t>(0))
-      .Input(0,
-             "input",
-             "For real multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]."
-             "For complex multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]."
-             "The first dimension is the batch dimension."
-             "The final dimension represents the real and imaginary parts of the value.",
-             "T1")
-      .Input(1,
-             "dft_length",
-             "The length of the signal."
-             "If greater than the axis dimension, the signal will be zero-padded up to dft_length. "
-             "If less than the axis dimension, only the first dft_length values will be used as the signal. "
-             "It's an optional value. ",
-             "T2",
-             OpSchema::Optional,
-             true,
-             1,
-             OpSchema::NonDifferentiable)
-      .Output(0,
-              "output",
-              "The inverse discrete Fourier transform of the input. "
-              "The signal_dim at the specified axis is equal to the dft_length."
-              "The expected shape is [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]"
-              "For all types of input, the last dimension of the output represents the components of a complex number.",
-              "T1",
-              OpSchema::Single,
-              true,
-              1,
-              OpSchema::NonDifferentiable)
-      .TypeConstraint(
-          "T1",
-          {"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"},
-          "Constrain input and output types to float tensors.")
-      .TypeConstraint(
-          "T2",
-          {"tensor(int64)"},
-          "Constrain scalar length types to int64_t.")
-      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
-        propagateElemTypeFromInputToOutput(ctx, 0, 0);
-        const int64_t batch_ndim = 1;
-
-        auto& input_shape = getInputShape(ctx, 0);
-        ONNX_NAMESPACE::TensorShapeProto result_shape = input_shape;
-        auto dim_size = static_cast<int64_t>(input_shape.dim_size());
-        auto has_component_dimension = dim_size > 2;
-
-        if (has_component_dimension) {
-          result_shape.mutable_dim(static_cast<int>(dim_size - 1))->set_dim_value(2);
-        } else {
-          result_shape.add_dim()->set_dim_value(2);
-        }
-
-        updateOutputShape(ctx, 0, result_shape);
-      });
-
-  MS_SIGNAL_OPERATOR_SCHEMA(STFT)
-      .SetDomain(kMSExperimentalDomain)
-      .SinceVersion(1)
-      .SetDoc(R"DOC(STFT)DOC")
-      .Attr(
-          "onesided",
-          "If onesided is 1, only values for w in [0, 1, 2, ..., floor(n_fft/2) + 1] are returned because "
-          "the real-to-complex Fourier transform satisfies the conjugate symmetry, i.e., X[m, w] = X[m,w] = "
-          "X[m,n_fft-w]*. Note if the input or window tensors are complex, then onesided output is not possible. "
-          "Enabling onesided with real inputs performs a Real-valued fast Fourier transform (RFFT)."
-          "When invoked with real or complex valued input, the default value is 1. "
-          "Values can be 0 or 1.",
-          AttributeProto::INT,
-          static_cast<int64_t>(1))
-      .Input(0,
-             "signal",
-             "Input tensor representing a real or complex valued signal. "
-             "For real input, the following shape is expected: [batch_size][signal_length][1]. "
-             "For complex input, the following shape is expected: [batch_size][signal_length][2], where "
-             "[batch_size][signal_length][0] represents the real component and [batch_size][signal_length][1] "
-             "represents the imaginary component of the signal.",
-             "T1",
-             OpSchema::Single,
-             true,
-             1,
-             OpSchema::NonDifferentiable)
-      .Input(1,
-             "frame_step",
-             "The number of samples to step between successive DFTs.",
-             "T2",
-             OpSchema::Single,
-             true,
-             1,
-             OpSchema::NonDifferentiable)
-      .Input(2,
-             "window",
-             "A tensor representing the window that will be slid over the signal."
-             "The window must have rank 1 with shape: [window_shape]. "
-             "It's an optional value. ",
-             "T1",
-             OpSchema::Optional,
-             true,
-             1,
-             OpSchema::NonDifferentiable)
-      .Input(3,
-             "frame_length",
-             "A scalar representing the size of the DFT. "
-             "It's an optional value.",
-             "T2",
-             OpSchema::Optional,
-             true,
-             1,
-             OpSchema::NonDifferentiable)
-      .Output(0,
-              "output",
-              "The Short-time Fourier Transform of the signals."
-              "If onesided is 1, the output has the shape: [batch_size][frames][dft_unique_bins][2], where "
-              "dft_unique_bins is frame_length // 2 + 1 (the unique components of the DFT) "
-              "If onesided is 0, the output has the shape: [batch_size][frames][frame_length][2], where frame_length "
-              "is the length of the DFT.",
-              "T1",
-              OpSchema::Single,
-              true,
-              1,
-              OpSchema::NonDifferentiable)
-      .TypeConstraint(
-          "T1",
-          {"tensor(float)",
-           "tensor(float16)",
-           "tensor(double)",
-           "tensor(bfloat16)"},
-          "Constrain signal and output to float tensors.")
-      .TypeConstraint(
-          "T2",
-          {"tensor(int32)", "tensor(int64)"},
-          "Constrain scalar length types to int64_t.")
-      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
-        propagateElemTypeFromInputToOutput(ctx, 0, 0);
-
-        // Get signal size
-        // The signal size is needed to perform inference because the size of the signal
-        // is needed to compute the number of DFTs in the output.
-        //
-        // 1) Check if shape exists, return if not
-        // 2) Get the shape
-        // 3) Check if signal dim value exists, return if not
-        if (!hasInputShape(ctx, 0)) {
-          return;
-        }
-
-        auto& input_shape = getInputShape(ctx, 0);
-        auto signal_dim = input_shape.dim(1);
-        if (!signal_dim.has_dim_value()) {
-          return;
-        }
-        auto signal_size = signal_dim.dim_value();
-
-        // The frame step is a required input.
-        // Its value is needed to compute the number output nDFTs, so return early is missing.
-        const auto* frame_step = ctx.getInputData(1);
-        if (nullptr == frame_step) {
-          return;
-        }
-        auto frame_step_value = get_scalar_value_from_tensor<int64_t>(frame_step);
-
-        // Determine the size of the DFT based on the 2 optional inputs window and frame_length.
-        // One must be set.
-        int64_t dft_size = -1;
-        const ONNX_NAMESPACE::TensorProto* frame_length = nullptr;
-        if (ctx.getNumInputs() >= 4 && ctx.getInputType(3) != nullptr) {
-          frame_length = ctx.getInputData(3);
-          if (frame_length == nullptr) {
-            // If we cannot read the frame_length, we cannot infer shape
-            // return...
-            return;
-          }
-        }
-
-        const ONNX_NAMESPACE::TensorShapeProto* window_shape = nullptr;
-        if (ctx.getNumInputs() >= 3) {
-          window_shape = getOptionalInputShape(ctx, 2);
-        } else {
-          window_shape = nullptr;
-        }
-
-        if (window_shape == nullptr && frame_length == nullptr) {
-          // STFT expects to have at least one of these inputs set: [window, frame_length],
-          // but they may not be available at shape inference time
-          return;
-        } else if (window_shape != nullptr && frame_length != nullptr) {
-          if (frame_length->dims_size() != 0) {
-            fail_shape_inference("frame_length input must be scalar.");
-          }
-          auto frame_length_value = get_scalar_value_from_tensor<int64_t>(frame_length);
-
-          // Ensure that the window length and the dft_length match.
-          if (window_shape->dim_size() != 1) {
-            fail_shape_inference("window input must have rank = 1.");
-          }
-          if (window_shape->dim(0).has_dim_value()) {
-            auto window_length = window_shape->dim(0).dim_value();
-            if (window_length != frame_length_value) {
-              fail_type_inference(
-                  "If STFT has both a window input and frame_length specified, the dimension of the "
-                  "window must match the frame_length specified!");
-            }
-          }
-
-          dft_size = frame_length_value;
-        } else if (window_shape != nullptr) {
-          // Ensure that the window length and the dft_length match.
-          if (window_shape->dim_size() != 1) {
-            fail_shape_inference("window input must have rank = 1.");
-          }
-          if (window_shape->dim(0).has_dim_value()) {
-            dft_size = window_shape->dim(0).dim_value();
-          } else {
-            // Cannot determine the window size, and there is no frame_length,
-            // So shape inference cannot proceed.
-            return;
-          }
-        } else if (frame_length != nullptr) {
-          if (frame_length->dims_size() != 0) {
-            fail_shape_inference("frame_length input must be scalar.");
-          }
-          dft_size = get_scalar_value_from_tensor<int64_t>(frame_length);
-        }
-
-        bool is_onesided = static_cast<bool>(getAttribute(ctx, "onesided", 0));
-        if (is_onesided) {
-          dft_size = is_onesided ? ((dft_size >> 1) + 1) : dft_size;
-        }
-
-        auto n_dfts = static_cast<int64_t>((signal_size - dft_size) / static_cast<float>(frame_step_value)) + 1;
-
-        // The output has the following shape: [batch_size][frames][dft_unique_bins][2]
-        ONNX_NAMESPACE::TensorShapeProto result_shape_proto;
-        result_shape_proto.add_dim()->set_dim_value(input_shape.dim(0).dim_value());  // batch size
-        result_shape_proto.add_dim()->set_dim_value(n_dfts);
-        result_shape_proto.add_dim()->set_dim_value(dft_size);
-        result_shape_proto.add_dim()->set_dim_value(2);
-        updateOutputShape(ctx, 0, result_shape_proto);
-      });
-
-  // Window Functions
-  MS_SIGNAL_OPERATOR_SCHEMA(HannWindow)
-      .SetDomain(kMSExperimentalDomain)
-      .SinceVersion(1)
-      .FillUsing(CosineSumWindowOpDocGenerator("Hann"))
-      .TypeConstraint(
-          "T1",
-          {"tensor(int32)", "tensor(int64)"},
-          "Constrain the input size to int64_t.")
-      .TypeConstraint(
-          "T2",
-          ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(),
-          "Constrain output types to numeric tensors.")
-      .FunctionBody(R"ONNX(
-        {
-          A0 = Constant <value = float {0.5}>()
-          A1 = Constant <value = float {0.5}>()
-          A2 = Constant <value = float {0.0}>()
-          Zero = Constant <value = float {0.0}>()
-          One = Constant <value = float {1.0}>()
-          Two = Constant <value = float {2.0}>()
-          Tau = Constant <value = float {6.2831853}>()
-          Size_FP = Cast <to = 1> (size)
-          AngularIncrement = Div (Tau, Size_FP)
-          Range = Range (Zero, Size_FP, One)
-          RangeAngular = Mul (Range, AngularIncrement)
-          TwoRangeAngular = Mul (RangeAngular, Two)
-          CosTwoRangeAngular = Cos (TwoRangeAngular)
-          A2_Component = Mul (A2, CosTwoRangeAngular)
-          CosRangeAngular = Cos (RangeAngular)
-          A1_Component = Mul (A1, CosRangeAngular)
-          Temp0 = Add (A1_Component, A2_Component)
-          Temp1 = Sub (A0, Temp0)
-          output = Cast <to : int = @output_datatype> (Temp1)
-        }
-        )ONNX");
-
-  MS_SIGNAL_OPERATOR_SCHEMA(HammingWindow)
-      .SetDomain(kMSExperimentalDomain)
-      .SinceVersion(1)
-      .FillUsing(CosineSumWindowOpDocGenerator("Hamming"))
-      .TypeConstraint(
-          "T1",
-          {"tensor(int32)", "tensor(int64)"},
-          "Constrain the input size to int64_t.")
-      .TypeConstraint(
-          "T2",
-          ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(),
-          "Constrain output types to numeric tensors.")
-      .FunctionBody(R"ONNX(
-        {
-          A0 = Constant <value = float {0.54347826087}>()
-          A1 = Constant <value = float {0.45652173913}>()
-          A2 = Constant <value = float {0.0}>()
-          Zero = Constant <value = float {0.0}>()
-          One = Constant <value = float {1.0}>()
-          Two = Constant <value = float {2.0}>()
-          Tau = Constant <value = float {6.2831853}>()
-          Size_FP = Cast <to = 1> (size)
-          AngularIncrement = Div (Tau, Size_FP)
-          Range = Range (Zero, Size_FP, One)
-          RangeAngular = Mul (Range, AngularIncrement)
-          TwoRangeAngular = Mul (RangeAngular, Two)
-          CosTwoRangeAngular = Cos (TwoRangeAngular)
-          A2_Component = Mul (A2, CosTwoRangeAngular)
-          CosRangeAngular = Cos (RangeAngular)
-          A1_Component = Mul (A1, CosRangeAngular)
-          Temp0 = Add (A1_Component, A2_Component)
-          Temp1 = Sub (A0, Temp0)
-          output = Cast <to : int = @output_datatype> (Temp1)
-        }
-        )ONNX");
-
-  MS_SIGNAL_OPERATOR_SCHEMA(BlackmanWindow)
-      .SetDomain(kMSExperimentalDomain)
-      .SinceVersion(1)
-      .FillUsing(CosineSumWindowOpDocGenerator("Blackman"))
-      .TypeConstraint(
-          "T1",
-          {"tensor(int32)", "tensor(int64)"},
-          "Constrain the input size to int64_t.")
-      .TypeConstraint(
-          "T2",
-          ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(),
-          "Constrain output types to numeric tensors.")
-      .FunctionBody(R"ONNX(
-        {
-          A0 = Constant <value = float {0.42}>()
-          A1 = Constant <value = float {0.5}>()
-          A2 = Constant <value = float {0.08}>()
-          Zero = Constant <value = float {0.0}>()
-          One = Constant <value = float {1.0}>()
-          Two = Constant <value = float {2.0}>()
-          Tau = Constant <value = float {6.2831853}>()
-          Size_FP = Cast <to = 1> (size)
-          AngularIncrement = Div (Tau, Size_FP)
-          Range = Range (Zero, Size_FP, One)
-          RangeAngular = Mul (Range, AngularIncrement)
-          TwoRangeAngular = Mul (RangeAngular, Two)
-          CosTwoRangeAngular = Cos (TwoRangeAngular)
-          A2_Component = Mul (A2, CosTwoRangeAngular)
-          CosRangeAngular = Cos (RangeAngular)
-          A1_Component = Mul (A1, CosRangeAngular)
-          Temp0 = Add (A1_Component, A2_Component)
-          Temp1 = Sub (A0, Temp0)
-          output = Cast <to : int = @output_datatype> (Temp1)
-        }
-        )ONNX");
-
-  static const char* MelWeightMatrix_ver17_doc = R"DOC(
-Generate a MelWeightMatrix that can be used to re-weight a Tensor containing a linearly sampled frequency spectra
-(from DFT or STFT) into num_mel_bins frequency information based on the [lower_edge_hertz, upper_edge_hertz] range
-on the mel scale.
-This function defines the mel scale in terms of a frequency in hertz according to the following formula:
-
-    mel(f) = 2595 * log10(1 + f/700)
-
-In the returned matrix, all the triangles (filterbanks) have a peak value of 1.0.
-
-The returned MelWeightMatrix can be used to right-multiply a spectrogram S of shape [frames, num_spectrogram_bins] of
-linear scale spectrum values (e.g. STFT magnitudes) to generate a "mel spectrogram" M of shape [frames, num_mel_bins].
-)DOC";
-
-  MS_SIGNAL_OPERATOR_SCHEMA(MelWeightMatrix)
-      .SetDomain(kMSExperimentalDomain)
-      .SinceVersion(1)
-      .SetDoc(R"DOC(MelWeightMatrix)DOC")
-      .Attr("output_datatype",
-            "The data type of the output tensor. "
-            "Strictly must be one of the types from DataType enum in TensorProto.",
-            ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_INT,
-            static_cast<int64_t>(onnx::TensorProto_DataType::TensorProto_DataType_FLOAT))
-      .Input(0,
-             "num_mel_bins",
-             "The number of bands in the mel spectrum.",
-             "T1")
-      .Input(1,
-             "dft_length",
-             "The size of the FFT.",
-             "T1")
-      .Input(2,
-             "sample_rate",
-             "",
-             "T1")
-      .Input(3,
-             "lower_edge_hertz",
-             "",
-             "T2")
-      .Input(4,
-             "upper_edge_hertz",
-             "",
-             "T2")
-      .Output(0,
-              "output",
-              "The MEL Matrix",
-              "T3")
-      .TypeConstraint(
-          "T1",
-          {"tensor(int32)", "tensor(int64)"},
-          "Constrain to integer tensors.")
-      .TypeConstraint(
-          "T2",
-          {"tensor(float)",
-           "tensor(float16)",
-           "tensor(double)",
-           "tensor(bfloat16)"},
-          "Constrain to float tensors")
-      .TypeConstraint(
-          "T3",
-          ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(),
-          "Constrain to any numerical types.")
-      .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
-        auto output_datatype = getAttribute(
-            ctx, "output_datatype", static_cast<int64_t>(onnx::TensorProto::DataType::TensorProto_DataType_FLOAT));
-        updateOutputElemType(ctx, 0, static_cast<int32_t>(output_datatype));
-
-        if (!hasInputShape(ctx, 0) || !hasInputShape(ctx, 1)) {
-          return;
-        }
-
-        const auto* num_mel_bins = ctx.getInputData(0);
-        const auto* dft_length = ctx.getInputData(1);
-        if (nullptr == num_mel_bins || nullptr == dft_length) {
-          return;
-        }
-
-        int64_t num_mel_bins_value = -1;
-        int64_t dft_length_value = -1;
-        if (num_mel_bins->dims_size() != 0) {
-          fail_shape_inference("num_mel_bins input must be scalar.");
-        }
-        num_mel_bins_value = get_scalar_value_from_tensor<int64_t>(num_mel_bins);
-
-        if (dft_length->dims_size() != 0) {
-          fail_shape_inference("dft_length input must be scalar.");
-        }
-        dft_length_value = get_scalar_value_from_tensor<int64_t>(dft_length);
-
-        if (num_mel_bins_value > 0 && dft_length_value > 0) {
-          ONNX_NAMESPACE::TensorShapeProto result_shape;
-          result_shape.add_dim()->set_dim_value(static_cast<int64_t>((dft_length_value >> 1) + 1));
-          result_shape.add_dim()->set_dim_value(num_mel_bins_value);
-          updateOutputShape(ctx, 0, result_shape);
-        }
-      });
-}
-
-}  // namespace signal
-}  // namespace onnxruntime
-
-#endif
diff --git a/onnxruntime/core/graph/signal_ops/signal_defs.h b/onnxruntime/core/graph/signal_ops/signal_defs.h
deleted file mode 100644
index 6960ff33f6e61..0000000000000
--- a/onnxruntime/core/graph/signal_ops/signal_defs.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-
-#if !defined(ORT_MINIMAL_BUILD)
-#include "onnx/defs/schema.h"
-#else
-#include "onnx/defs/data_type_utils.h"
-#endif
-#include "onnx/onnx_pb.h"
-#include "onnx/onnx-operators_pb.h"
-
-namespace onnxruntime {
-namespace signal {
-#define MS_SIGNAL_OPERATOR_SCHEMA(name) \
-  MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name)
-#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \
-  MS_SIGNAL_OPERATOR_SCHEMA_UNIQ(Counter, name)
-#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ(Counter, name)         \
-  static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \
-      op_schema_register_once##name##Counter) ONNX_UNUSED =      \
-      ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)
-
-#define MS_SIGNAL_OPERATOR_SCHEMA_ELSEWHERE(name, schema_func) \
-  MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(__COUNTER__, name, schema_func)
-#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(Counter, name, schema_func) \
-  MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func)
-#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \
-  static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce(                \
-      op_schema_register_once##name##Counter) ONNX_UNUSED =                     \
-      schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__))
-
-void RegisterSignalSchemas();
-}  // namespace dml
-}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
index 62e1d1f73f353..191f34439c7bf 100644
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -751,6 +751,14 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, int32_t, LessOrEqual);
 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, int64_t, LessOrEqual);
 
+// Opset 17
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, DFT);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, BlackmanWindow);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, HammingWindow);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, HannWindow);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, MelWeightMatrix);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, STFT);
+
 // !!PLEASE READ BELOW!! Following that, add new entries above this comment
 
 /*  *** IMPORTANT! ***
@@ -1953,6 +1961,14 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
                                                                 LessOrEqual)>,
     BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, int64_t,
                                                                 LessOrEqual)>,
+
+    // Opset 17
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, DFT)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, BlackmanWindow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, HammingWindow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, HannWindow)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, MelWeightMatrix)>,
+    BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, STFT)>,
   };
 
   for (auto& function_table_entry : function_table) {
diff --git a/onnxruntime/core/providers/cpu/signal/dft.cc b/onnxruntime/core/providers/cpu/signal/dft.cc
new file mode 100644
index 0000000000000..97d7e19a7c4b1
--- /dev/null
+++ b/onnxruntime/core/providers/cpu/signal/dft.cc
@@ -0,0 +1,508 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/cpu/signal/dft.h"
+
+#include <cmath>
+#include <complex>
+#include <functional>
+#include <limits>
+#include <vector>
+
+#include "core/framework/op_kernel.h"
+#include "core/platform/threadpool.h"
+#include "core/providers/common.h"
+#include "core/providers/cpu/signal/utils.h"
+#include "core/util/math_cpuonly.h"
+#include "Eigen/src/Core/Map.h"
+
+namespace onnxruntime {
+
+ONNX_CPU_OPERATOR_KERNEL(DFT, 17,
+                         KernelDefBuilder()
+                             .TypeConstraint("T1", BuildKernelDefConstraints<float, double>())
+                             .TypeConstraint("T2", BuildKernelDefConstraints<int32_t, int64_t>()),
+                         DFT);
+
+ONNX_CPU_OPERATOR_KERNEL(STFT, 17,
+                         KernelDefBuilder()
+                             .MayInplace(0, 0)
+                             .TypeConstraint("T1", BuildKernelDefConstraints<float, double>())
+                             .TypeConstraint("T2", BuildKernelDefConstraints<int32_t, int64_t>()),
+                         STFT);
+
+static bool is_real_valued_signal(const onnxruntime::TensorShape& shape) {
+  return shape.NumDimensions() == 2 || shape[shape.NumDimensions() - 1] == 1;
+}
+
+static bool is_complex_valued_signal(const onnxruntime::TensorShape& shape) {
+  return shape.NumDimensions() > 2 && shape[shape.NumDimensions() - 1] == 2;
+}
+
+static bool is_power_of_2(size_t size) {
+  unsigned n_bits = 0;
+  while (size != 0) {
+    n_bits += size & 1;
+    size = size >> 1;
+  }
+  return n_bits == 1;
+}
+
+static const unsigned char BitReverseTable256[] = {
+    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48,
+    0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4,
+    0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C,
+    0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2,
+    0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A,
+    0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E,
+    0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21,
+    0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+    0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55,
+    0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD,
+    0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B,
+    0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
+    0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F,
+    0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF};
+
+template <typename T>
+static inline T bit_reverse(T num, unsigned significant_bits) {
+  if (significant_bits > 32) {
+    ORT_THROW("Unsupported bit size.");
+  }
+  uint32_t num_32 = static_cast<uint32_t>(num);
+  uint32_t rev = (BitReverseTable256[num_32 & 0xff] << 24) | (BitReverseTable256[(num_32 >> 8) & 0xff] << 16) |
+                 (BitReverseTable256[(num_32 >> 16) & 0xff] << 8) | (BitReverseTable256[(num_32 >> 24) & 0xff]);
+  return static_cast<T>(((uint64_t)rev) >> (32 - significant_bits));
+}
+
+template <typename T>
+static T compute_angular_velocity(size_t number_of_samples, bool inverse) {
+  // Calculate fundamental angular velocity
+  static const T pi = static_cast<T>(3.14159265);
+  static const T tau = 2 * pi;
+  T inverse_switch = inverse ? 1.f : -1.f;
+  T angular_velocity = inverse_switch * tau / number_of_samples;
+  return angular_velocity;
+}
+
+template <typename T>
+static std::complex<T> compute_exponential(size_t index, const T angular_velocity) {
+  const T angle = static_cast<T>(index) * angular_velocity;
+  return std::complex<T>(cos(angle), sin(angle));
+}
+
+template <typename T, typename U>
+static Status fft_radix2(OpKernelContext* /*ctx*/, const Tensor* X, Tensor* Y, size_t X_offset, size_t X_stride,
+                         size_t Y_offset, size_t Y_stride, int64_t axis, size_t dft_length, const Tensor* window,
+                         bool is_onesided, bool inverse, InlinedVector<std::complex<T>>& V,
+                         InlinedVector<std::complex<T>>& temp_output) {
+  // Get shape and significant bits
+  const auto& X_shape = X->Shape();
+  size_t number_of_samples = static_cast<size_t>(X_shape[axis]);
+  unsigned significant_bits = static_cast<unsigned>(log2(dft_length));
+
+  // Get data
+  auto* X_data = const_cast<U*>(reinterpret_cast<const U*>(X->DataRaw())) + X_offset;
+  // Get window
+  U* window_data = nullptr;
+  if (window) {
+    window_data = const_cast<U*>(reinterpret_cast<const U*>(window->DataRaw()));
+  }
+
+  size_t Y_data_stride = 1;
+  std::complex<T>* Y_data;
+  if (is_onesided) {
+    if (temp_output.size() != dft_length) {
+      temp_output.resize(dft_length);
+    }
+    Y_data = temp_output.data();
+  } else {
+    Y_data = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
+    Y_data_stride = Y_stride;
+  }
+
+  auto angular_velocity = compute_angular_velocity<T>(dft_length, inverse);
+
+  // Create vandermonde matrix V ordered with the bit-reversed permutation
+  if (V.size() != dft_length) {
+    V.resize(dft_length);
+    for (size_t i = 0; i < dft_length; i++) {
+      size_t bit_reversed_index = bit_reverse(i, significant_bits);
+      V[bit_reversed_index] = compute_exponential(i, angular_velocity);
+    }
+  }
+
+  for (size_t i = 0; i < dft_length; i++) {
+    size_t bit_reversed_index = bit_reverse(i, significant_bits);
+    auto x = (bit_reversed_index < number_of_samples) ? *(X_data + bit_reversed_index * X_stride) : 0;
+    auto window_element = window_data ? *(window_data + bit_reversed_index) : 1;
+    *(Y_data + i * Y_data_stride) = std::complex<T>(1, 0) * x * window_element;
+  }
+
+  // Run fft_radix2
+  unsigned current_significant_bits = 0;
+  for (size_t i = 2; i <= dft_length; i <<= 1) {
+    size_t midpoint = i >> 1;
+    current_significant_bits++;
+
+    for (size_t k = 0; k < midpoint; k++) {
+      auto first_idx = bit_reverse(k, current_significant_bits);
+      auto second_idx = bit_reverse(midpoint + k, current_significant_bits);
+      for (size_t j = 0; j < dft_length; j += i) {
+        auto even_index = k + j;
+        auto odd_index = k + j + midpoint;
+        std::complex<T>* even = (Y_data + even_index * Y_data_stride);
+        std::complex<T>* odd = (Y_data + odd_index * Y_data_stride);
+        std::complex<T> first = *even + (V[first_idx] * *odd);
+        std::complex<T> second = *even + (V[second_idx] * *odd);
+        *even = first;
+        *odd = second;
+      }
+    }
+  }
+
+  // Scale the output if inverse
+  if (inverse) {
+    for (size_t i = 0; i < dft_length; i++) {
+      std::complex<T>& val = *(Y_data + i * Y_data_stride);
+      val /= static_cast<T>(dft_length);
+    }
+  }
+
+  if (is_onesided) {
+    const size_t output_size = (dft_length >> 1) + 1;
+    auto destination = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
+    for (size_t i = 0; i < output_size; i++) {
+      *(destination + Y_stride * i) = *(Y_data + i * Y_data_stride);
+    }
+  }
+
+  return Status::OK();
+}
+
+template <typename T, typename U>
+static Status dft_naive(const Tensor* X, Tensor* Y, size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride,
+                        int64_t axis, size_t dft_length, const Tensor* window, bool inverse) {
+  // Get shape and significant bits
+  const auto& X_shape = X->Shape();
+  size_t number_of_samples = static_cast<size_t>(X_shape[axis]);
+  const auto& Y_shape = Y->Shape();
+  size_t dft_output_size = static_cast<size_t>(Y_shape[axis]);
+
+  // Get data
+  auto* X_data = const_cast<U*>(reinterpret_cast<const U*>(X->DataRaw())) + X_offset;
+  auto* Y_data = reinterpret_cast<std::complex<T>*>(Y->MutableDataRaw()) + Y_offset;
+
+  U* window_data = nullptr;
+  if (window) {
+    window_data = const_cast<U*>(reinterpret_cast<const U*>(window->DataRaw()));
+  }
+
+  auto angular_velocity = compute_angular_velocity<T>(dft_length, inverse);
+
+  for (size_t i = 0; i < dft_output_size; i++) {
+    std::complex<T>& out = *(Y_data + i * Y_stride);
+    out.real(0);
+    out.imag(0);
+
+    for (size_t j = 0; j < dft_length; j++) {  // vectorize over this loop
+      auto exponential = compute_exponential(i * j, angular_velocity);
+      auto window_element = window_data ? *(window_data + j) : 1;
+      auto x = (j < number_of_samples) ? *(X_data + j * X_stride) : 0;
+      auto element = x * window_element;
+      out += exponential * element;
+    }
+
+    if (inverse) {
+      out /= static_cast<T>(dft_length);
+    }
+  }
+
+  return Status::OK();
+}
+
+template <typename T, typename U>
+static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis,
+                                         int64_t dft_length, const Tensor* window, bool is_onesided, bool inverse,
+                                         InlinedVector<std::complex<T>>& V,
+                                         InlinedVector<std::complex<T>>& temp_output) {
+  // Get shape
+  const auto& X_shape = X->Shape();
+  const auto& Y_shape = Y->Shape();
+
+  auto batch_and_signal_rank = X->Shape().NumDimensions();
+  auto total_dfts = static_cast<size_t>(X->Shape().Size() / X->Shape()[axis]);
+
+  auto is_input_real = X->Shape().NumDimensions() == 2 || X->Shape()[X->Shape().NumDimensions() - 1] == 1;
+  auto complex_input_factor = is_input_real ? 1 : 2;
+  if (X->Shape().NumDimensions() > 2) {
+    total_dfts /= X->Shape()[X->Shape().NumDimensions() - 1];
+    batch_and_signal_rank -= 1;
+  }
+
+  // Calculate x/y offsets/strides
+  for (size_t i = 0; i < total_dfts; i++) {
+    size_t X_offset = 0;
+    size_t X_stride = X_shape.SizeFromDimension(axis + 1) / complex_input_factor;
+    size_t cumulative_packed_stride = total_dfts;
+    size_t temp = i;
+    for (size_t r = 0; r < batch_and_signal_rank; r++) {
+      if (r == static_cast<size_t>(axis)) {
+        continue;
+      }
+      cumulative_packed_stride /= X_shape[r];
+      auto index = temp / cumulative_packed_stride;
+      temp -= (index * cumulative_packed_stride);
+      X_offset += index * X_shape.SizeFromDimension(r + 1) / complex_input_factor;
+    }
+
+    size_t Y_offset = 0;
+    size_t Y_stride = Y_shape.SizeFromDimension(axis + 1) / 2;
+    cumulative_packed_stride = total_dfts;
+    temp = i;
+    for (size_t r = 0; r < batch_and_signal_rank; r++) {
+      if (r == static_cast<size_t>(axis)) {
+        continue;
+      }
+      cumulative_packed_stride /= X_shape[r];
+      auto index = temp / cumulative_packed_stride;
+      temp -= (index * cumulative_packed_stride);
+      Y_offset += index * Y_shape.SizeFromDimension(r + 1) / 2;
+    }
+
+    if (is_power_of_2(dft_length)) {
+      ORT_RETURN_IF_ERROR((fft_radix2<T, U>(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window,
+                                            is_onesided, inverse, V, temp_output)));
+    } else {
+      ORT_RETURN_IF_ERROR(
+          (dft_naive<T, U>(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, inverse)));
+    }
+  }
+
+  return Status::OK();
+}
+
+static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, bool is_onesided, bool inverse) {
+  // Get input shape
+  const auto* X = ctx->Input<Tensor>(0);
+  const auto* dft_length = ctx->Input<Tensor>(1);
+  const auto& X_shape = X->Shape();
+  const auto is_real_valued = is_real_valued_signal(X_shape);
+  const auto is_complex_valued = is_complex_valued_signal(X_shape);
+  axis = HandleNegativeAxis(axis, X_shape.NumDimensions());
+
+  int64_t number_of_samples = static_cast<int64_t>(X_shape[axis]);
+  if (dft_length) {
+    const auto& dft_length_shape = dft_length->Shape();
+    ORT_RETURN_IF(!dft_length_shape.IsScalar(), "dft_length must be a scalar value.");
+    number_of_samples = static_cast<int>(signal::get_scalar_value_from_tensor<int64_t>(dft_length));
+    ORT_RETURN_IF(number_of_samples <= 0, "dft_length must be greater than zero.");
+  }
+
+  // Get the DFT output size. Onesided will return only the unique values!
+  // note: x >> 1 === std::floor(x / 2.f)
+  auto dft_output_size = is_onesided ? ((number_of_samples >> 1) + 1) : number_of_samples;
+
+  // Get output shape
+  auto Y_shape = onnxruntime::TensorShape(X_shape);
+  if (X_shape.NumDimensions() == 2) {
+    Y_shape = onnxruntime::TensorShape({X_shape[0], dft_output_size, 2});
+  } else {
+    Y_shape[Y_shape.NumDimensions() - 1] = 2;
+  }
+  Y_shape[axis] = dft_output_size;
+  auto Y = ctx->Output(0, Y_shape);
+
+  // Get data type
+  auto data_type = X->DataType();
+
+  auto element_size = data_type->Size();
+  if (element_size == sizeof(float)) {
+    InlinedVector<std::complex<float>> V;
+    InlinedVector<std::complex<float>> temp_output;
+    if (is_real_valued) {
+      ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, float>(ctx, X, Y, axis, number_of_samples, nullptr,
+                                                                    is_onesided, inverse, V, temp_output)));
+    } else if (is_complex_valued) {
+      ORT_RETURN_IF_ERROR((discrete_fourier_transform<float, std::complex<float>>(
+          ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
+    } else {
+      ORT_THROW(
+          "Unsupported input signal shape. The signal's first dimension must be the batch dimension and its second "
+          "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for "
+          "complex inputs.",
+          data_type);
+    }
+  } else if (element_size == sizeof(double)) {
+    InlinedVector<std::complex<double>> V;
+    InlinedVector<std::complex<double>> temp_output;
+    if (is_real_valued) {
+      ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, double>(ctx, X, Y, axis, number_of_samples, nullptr,
+                                                                      is_onesided, inverse, V, temp_output)));
+    } else if (is_complex_valued) {
+      ORT_RETURN_IF_ERROR((discrete_fourier_transform<double, std::complex<double>>(
+          ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output)));
+    } else {
+      ORT_THROW(
+          "Unsupported input signal shape. The signal's first dimension must be the batch dimension and its second "
+          "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for "
+          "complex inputs.",
+          data_type);
+    }
+  } else {
+    ORT_THROW("Unsupported input data type of ", data_type);
+  }
+
+  return Status::OK();
+}
+
+Status DFT::Compute(OpKernelContext* ctx) const {
+  ORT_RETURN_IF_ERROR(discrete_fourier_transform(ctx, axis_, is_onesided_, is_inverse_));
+  return Status::OK();
+}
+
+template <typename T, typename U>
+static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_onesided, bool /*inverse*/) {
+  // Attr("onesided"): default = 1
+  // Input(0, "signal") type = T1
+  // Input(1, "frame_length") type = T2
+  // Input(2, "window") type = T1, optional
+  // Input(3, "frame_step") type = T2
+  // Output(0, "output") type = T1
+
+  // Get signal
+  const auto* signal = ctx->Input<Tensor>(0);
+  const auto frame_step = signal::get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(1));
+  const auto* window = ctx->Input<Tensor>(2);
+  const auto* frame_length_tensor = ctx->Input<Tensor>(3);
+
+  // Get input signal shape
+  const auto& signal_shape = signal->Shape();
+  const auto batch_size = signal_shape[0];
+  const auto signal_size = signal_shape[1];
+  const auto signal_components = signal_shape.NumDimensions() == 2   ? 1
+                                 : signal_shape.NumDimensions() == 3 ? signal_shape[2]
+                                                                     : 0;  // error
+  ORT_ENFORCE(signal_components == 1 || signal_components == 2,
+              "signal shape must end in 1 (real) or 2 (real, imaginary).");
+
+  // Get the frame length
+  int64_t frame_length = std::numeric_limits<int64_t>::min();
+  if (frame_length_tensor) {
+    frame_length = signal::get_scalar_value_from_tensor<int64_t>(frame_length_tensor);
+  }
+
+  // Get window length
+  int64_t window_length = std::numeric_limits<int64_t>::min();
+  if (window) {
+    window_length = window->Shape()[0];
+  }
+
+  // The frame_length and window inputs are generally used interchangeably, and should match!
+  if (frame_length != std::numeric_limits<int64_t>::min() && window_length != std::numeric_limits<int64_t>::min()) {
+    ORT_ENFORCE(
+        frame_length == window_length,
+        "If both frame_length and window are set, then the size of the window must be equal to the frame_length.");
+  }
+
+  // Calculate the window size with preference to the window input.
+  const auto window_size = window ? window->Shape()[0] : frame_length;
+  ORT_ENFORCE(window_size < signal_size, "Ensure that the dft size is smaller than the signal.");
+
+  // Calculate the number of dfts to run
+  const auto n_dfts =
+      static_cast<int64_t>(std::floor((signal_size - window_size) / static_cast<float>(frame_step)) + 1);
+
+  // Calculate the output spectra length (onesided will return only the unique values)
+  // note: x >> 1 === std::floor(x / 2.f)
+  const auto dft_output_size = is_onesided ? (window_size >> 1) + 1 : window_size;
+
+  // Get/create the output mutable data
+  auto output_spectra_shape = onnxruntime::TensorShape({batch_size, n_dfts, dft_output_size, 2});
+  auto Y = ctx->Output(0, output_spectra_shape);
+  auto Y_data = reinterpret_cast<T*>(Y->MutableDataRaw());
+
+  // Get/create the signal mutable data
+  auto* signal_data = const_cast<U*>(reinterpret_cast<const U*>(signal->DataRaw()));
+
+  // Define tensor shapes for each dft run
+  const int64_t output_components = 2;
+  auto dft_input_shape = onnxruntime::TensorShape({1, window_size, signal_components});
+  auto dft_output_shape = onnxruntime::TensorShape({1, dft_output_size, output_components});
+
+  InlinedVector<std::complex<T>> V;
+  InlinedVector<std::complex<T>> temp_output;
+
+  // Run each dft of each batch as if it was a real-valued batch size 1 dft operation
+  for (int64_t batch_idx = 0; batch_idx < batch_size; batch_idx++) {
+    for (int64_t i = 0; i < n_dfts; i++) {
+      auto input_frame_begin =
+          signal_data + (batch_idx * signal_size * signal_components) + (i * frame_step * signal_components);
+
+      auto output_frame_begin = Y_data + (batch_idx * n_dfts * dft_output_size * output_components) +
+                                (i * dft_output_size * output_components);
+
+      // Tensors do not own the backing memory, so no worries on destruction
+      auto input = onnxruntime::Tensor(signal->DataType(), dft_input_shape, input_frame_begin, signal->Location(), 0);
+
+      auto output = onnxruntime::Tensor(Y->DataType(), dft_output_shape, output_frame_begin, Y->Location(), 0);
+
+      // Run individual dft
+      ORT_RETURN_IF_ERROR((discrete_fourier_transform<T, U>(ctx, &input, &output, 1, window_size, window, is_onesided,
+                                                            false, V, temp_output)));
+    }
+  }
+
+  return Status::OK();
+}
+
+Status STFT::Compute(OpKernelContext* ctx) const {
+  // Attr("onesided"): default = 1
+  // Input(0, "signal") type = T1
+  // Input(1, "frame_length") type = T2
+  // Input(2, "window") type = T1, optional
+  // Input(3, "frame_step") type = T2
+  // Output(0, "output") type = T1
+
+  // Get signal shape
+  const auto* signal = ctx->Input<Tensor>(0);
+  const auto& signal_shape = signal->Shape();
+  const auto is_real_valued = is_real_valued_signal(signal_shape);
+  const auto is_complex_valued = is_complex_valued_signal(signal_shape);
+
+  // Get data type
+  auto data_type = signal->DataType();
+
+  const auto element_size = data_type->Size();
+  if (element_size == sizeof(float)) {
+    if (is_real_valued) {
+      ORT_RETURN_IF_ERROR((short_time_fourier_transform<float, float>(ctx, is_onesided_, false)));
+    } else if (is_complex_valued) {
+      ORT_RETURN_IF_ERROR((short_time_fourier_transform<float, std::complex<float>>(ctx, is_onesided_, false)));
+    } else {
+      ORT_THROW(
+          "Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second "
+          "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for "
+          "complex inputs.",
+          data_type);
+    }
+  } else if (element_size == sizeof(double)) {
+    if (is_real_valued) {
+      ORT_RETURN_IF_ERROR((short_time_fourier_transform<double, double>(ctx, is_onesided_, false)));
+    } else if (is_complex_valued) {
+      ORT_RETURN_IF_ERROR((short_time_fourier_transform<double, std::complex<double>>(ctx, is_onesided_, false)));
+    } else {
+      ORT_THROW(
+          "Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second "
+          "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for "
+          "complex inputs.",
+          data_type);
+    }
+  } else {
+    ORT_THROW("Unsupported input data type of ", data_type);
+  }
+
+  return Status::OK();
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/signal/dft.h b/onnxruntime/core/providers/cpu/signal/dft.h
similarity index 68%
rename from onnxruntime/contrib_ops/cpu/signal/dft.h
rename to onnxruntime/core/providers/cpu/signal/dft.h
index e177eb877ea7b..71cac52e37e8f 100644
--- a/onnxruntime/contrib_ops/cpu/signal/dft.h
+++ b/onnxruntime/core/providers/cpu/signal/dft.h
@@ -1,35 +1,28 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
+#include "core/common/common.h"
+#include "core/framework/op_kernel.h"
 
 namespace onnxruntime {
-namespace contrib {
 
 class DFT final : public OpKernel {
   bool is_onesided_ = true;
   int64_t axis_ = 0;
   bool is_inverse_ = false;
+
  public:
   explicit DFT(const OpKernelInfo& info) : OpKernel(info) {
     is_onesided_ = static_cast<bool>(info.GetAttrOrDefault<int64_t>("onesided", 0));
-    axis_ = info.GetAttrOrDefault<int64_t>("axis", 0);
+    axis_ = info.GetAttrOrDefault<int64_t>("axis", 1);
     is_inverse_ = info.GetAttrOrDefault<int64_t>("inverse", 0);
   }
   Status Compute(OpKernelContext* ctx) const override;
 };
 
-class IDFT final : public OpKernel {
-  int64_t axis_ = 0;
- public:
-  explicit IDFT(const OpKernelInfo& info) : OpKernel(info) {
-    axis_ = info.GetAttrOrDefault<int64_t>("axis", 0);
-  }
-  Status Compute(OpKernelContext* ctx) const override;
-};
-
 class STFT final : public OpKernel {
   bool is_onesided_ = true;
+
  public:
   explicit STFT(const OpKernelInfo& info) : OpKernel(info) {
     is_onesided_ = static_cast<bool>(info.GetAttrOrDefault<int64_t>("onesided", 1));
@@ -37,7 +30,4 @@ class STFT final : public OpKernel {
   Status Compute(OpKernelContext* ctx) const override;
 };
 
-}  // namespace contrib
 }  // namespace onnxruntime
-
-#endif
diff --git a/onnxruntime/core/providers/cpu/signal/utils.h b/onnxruntime/core/providers/cpu/signal/utils.h
new file mode 100644
index 0000000000000..a5ff5df6e5d48
--- /dev/null
+++ b/onnxruntime/core/providers/cpu/signal/utils.h
@@ -0,0 +1,30 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/framework/tensor.h"
+
+namespace onnxruntime {
+namespace signal {
+
+template <typename T>
+static T get_scalar_value_from_tensor(const Tensor* tensor) {
+  ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value.");
+  const auto data_type = tensor->GetElementType();
+  switch (data_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+      return static_cast<T>(*tensor->Data<float>());
+    case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE:
+      return static_cast<T>(*tensor->Data<double>());
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32:
+      return static_cast<T>(*tensor->Data<int32_t>());
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64:
+      return static_cast<T>(*tensor->Data<int64_t>());
+    default:
+      ORT_THROW("Unsupported input data type of ", data_type);
+  }
+}
+
+}  // namespace signal
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/signal/window_functions.cc b/onnxruntime/core/providers/cpu/signal/window_functions.cc
new file mode 100644
index 0000000000000..4ddd76641a6eb
--- /dev/null
+++ b/onnxruntime/core/providers/cpu/signal/window_functions.cc
@@ -0,0 +1,216 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/cpu/signal/window_functions.h"
+
+#include <cmath>
+
+#include "core/providers/common.h"
+#include "core/providers/cpu/signal/utils.h"
+
+namespace onnxruntime {
+ONNX_CPU_OPERATOR_KERNEL(HannWindow, 17,
+                         KernelDefBuilder()
+                             .MayInplace(0, 0)                                                     //
+                             .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())  //
+                             .TypeConstraint("T2",
+                                             BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t,
+                                                                       uint64_t, int8_t, int16_t, int32_t, int64_t>()),
+                         HannWindow);
+
+ONNX_CPU_OPERATOR_KERNEL(HammingWindow, 17,
+                         KernelDefBuilder()
+                             .MayInplace(0, 0)                                                     //
+                             .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())  //
+                             .TypeConstraint("T2",
+                                             BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t,
+                                                                       uint64_t, int8_t, int16_t, int32_t, int64_t>()),
+                         HammingWindow);
+
+ONNX_CPU_OPERATOR_KERNEL(BlackmanWindow, 17,
+                         KernelDefBuilder()
+                             .MayInplace(0, 0)                                                     //
+                             .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())  //
+                             .TypeConstraint("T2",
+                                             BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t,
+                                                                       uint64_t, int8_t, int16_t, int32_t, int64_t>()),
+                         BlackmanWindow);
+
+ONNX_CPU_OPERATOR_KERNEL(MelWeightMatrix, 17,
+                         KernelDefBuilder()
+                             .MayInplace(0, 0)                                                     //
+                             .TypeConstraint("T1", BuildKernelDefConstraints<int32_t, int64_t>())  //
+                             .TypeConstraint("T2", BuildKernelDefConstraints<float>())
+                             .TypeConstraint("T3",
+                                             BuildKernelDefConstraints<float, double, uint8_t, uint16_t, uint32_t,
+                                                                       uint64_t, int8_t, int16_t, int32_t, int64_t>()),
+                         MelWeightMatrix);
+
+template <typename T>
+struct CosineSumWindow {
+  Status operator()(Tensor* Y, size_t size, float a0, float a1, float a2, bool is_periodic) {
+    auto* Y_data = reinterpret_cast<T*>(Y->MutableDataRaw());
+
+    // Calculate the radians to increment per sample
+    constexpr double pi = 3.14159265;
+    constexpr double tau = 2 * pi;
+    const size_t denominator = is_periodic ? size : size - 1;
+    const double angular_increment = tau / denominator;
+
+    for (size_t i = 0; i < size; i++) {
+      auto a2_component = a2 == 0 ? 0 : (a2 * cos(2 * angular_increment * i));
+
+      T& value = *(Y_data + i);
+      value = static_cast<T>(a0 - (a1 * cos(angular_increment * i)) + a2_component);
+    }
+
+    return Status::OK();
+  }
+};
+
+static Status create_cosine_sum_window(OpKernelContext* ctx, onnx::TensorProto_DataType output_datatype, float a0,
+                                       float a1, float a2, bool is_periodic) {
+  // Get the size of the window
+  auto size = signal::get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(0));
+
+  // Get the output tensor
+  auto Y_shape = TensorShape({size});
+  auto Y = ctx->Output(0, Y_shape);
+
+  utils::MLTypeCallDispatcher<float, double, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t>
+      dispatcher(output_datatype);
+  return dispatcher.InvokeRet<Status, CosineSumWindow>(Y, size, a0, a1, a2, is_periodic);
+}
+
+Status HannWindow::Compute(OpKernelContext* ctx) const {
+  // HannWindows are a special case of Cosine-Sum Windows which take the following form:
+  // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k:
+  float a0 = .5f;
+  float a1 = a0;
+  float a2 = 0;
+  return create_cosine_sum_window(ctx, data_type_, a0, a1, a2, is_periodic_);
+}
+
+Status HammingWindow::Compute(OpKernelContext* ctx) const {
+  // HammingWindows are a special case of Cosine-Sum Windows which take the following form:
+  // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k:
+  float a0 = 25.f / 46.f;
+  float a1 = 1 - a0;
+  float a2 = 0;
+  return create_cosine_sum_window(ctx, data_type_, a0, a1, a2, is_periodic_);
+}
+
+Status BlackmanWindow::Compute(OpKernelContext* ctx) const {
+  // BlackmanWindows are a special case of Cosine-Sum Windows which take the following form:
+  // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k:
+  float alpha = .16f;
+  float a2 = alpha / 2.f;
+  float a0 = .5f - a2;
+  float a1 = .5f;
+  return create_cosine_sum_window(ctx, data_type_, a0, a1, a2, is_periodic_);
+}
+
+static inline double hz_to_mel_scale(double hz) { return 2595 * std::log10(1 + hz / 700); }
+
+static inline double mel_scale_to_hz(double mels) { return 700 * (pow(10, (mels / 2595)) - 1); }
+
+template <typename T>
+struct CreateMelWeightMatrix {
+  Status operator()(OpKernelContext* ctx, int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate,
+                    float lower_edge_hertz, float upper_edge_hertz) {
+    // Determine the width of the spectrogram.
+    // This is determined as half the size of the fft size. The first element of the spectrum is always retained,
+    // and the remaining are halved. The second half can be discarded due to the conjugate symmetry of the output with
+    // real valued ffts. Taken together the formula for the size of the output will be std::floor(dft_length / 2) + 1.
+    int64_t num_spectrogram_bins = static_cast<int64_t>(std::floor(dft_length / 2 + 1));
+
+    // Checks
+    auto lowest_index = std::floor(((dft_length + 1) * lower_edge_hertz) / sample_rate);
+    auto highest_index = std::floor(((dft_length + 1) * upper_edge_hertz) / sample_rate);
+    ORT_ENFORCE(
+        lowest_index >= 0 && lowest_index < num_spectrogram_bins,
+        "lower_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the "
+        "sample_rate.");
+    ORT_ENFORCE(
+        highest_index >= 0 && highest_index < num_spectrogram_bins,
+        "upper_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the "
+        "sample_rate.");
+
+    // Create the output shape
+    TensorShape output_shape({static_cast<int64_t>(num_spectrogram_bins), num_mel_bins});
+    auto* Y = ctx->Output(0, output_shape);
+
+    // Get the raw output data
+    auto* Y_data = reinterpret_cast<T*>(Y->MutableDataRaw());
+
+    // Set the weight matrix to 0
+    memset(Y_data, 0, num_spectrogram_bins * num_mel_bins * sizeof(T));
+
+    // The mel filterbank is a triangular shaped peak with a height of 1 and a base equal to the size of the MEL range
+    // divided by the number of bins needed times 2. This triangle is then slid across the mel domain linearly, with a
+    // constant step size that is equal to half of the base of the triangle. To accommodate N bins, N+2 data points will
+    // be needed to determine the start, center and end points of each mel triangle filter.
+    //
+    // low_frequency where the mel triangle filter banks begin, and they end on the high_frequency_mel
+    // The range is divided evenly to create the needed points corresponding to the begin, center, end points of each
+    // triangle filterbank
+    InlinedVector<size_t> frequency_bins(num_mel_bins + 2);
+    auto low_frequency_mel = hz_to_mel_scale(lower_edge_hertz);
+    auto high_frequency_mel = hz_to_mel_scale(upper_edge_hertz);
+    auto mel_step = (high_frequency_mel - low_frequency_mel) / static_cast<float>(frequency_bins.size());
+
+    // Convert each point from mel scale back to hertz, and then compute the corresponding index in the fft
+    for (size_t i = 0; i < frequency_bins.size(); i++) {
+      auto hz = mel_scale_to_hz(low_frequency_mel + mel_step * i);
+      frequency_bins[i] = static_cast<size_t>(std::floor(((dft_length + 1) * hz) / sample_rate));
+    }
+
+    for (size_t i = 0; i < static_cast<size_t>(num_mel_bins); i++) {
+      auto lower_frequency_value = frequency_bins[i];       // left
+      auto center_frequency_point = frequency_bins[i + 1];  // center
+      auto higher_frequency_point = frequency_bins[i + 2];  // right
+
+      auto low_to_center = center_frequency_point - lower_frequency_value;
+      if (low_to_center == 0) {
+        auto& current_element = *(Y_data + (center_frequency_point * num_mel_bins) + i);
+        current_element = static_cast<T>(1);
+      } else {
+        for (size_t j = lower_frequency_value; j <= center_frequency_point; j++) {
+          auto& current_element = *(Y_data + (j * num_mel_bins) + i);
+          current_element = static_cast<T>((j - lower_frequency_value) / static_cast<T>(low_to_center));
+        }
+      }
+
+      auto center_to_high = higher_frequency_point - center_frequency_point;
+      if (center_to_high > 0) {
+        for (size_t j = center_frequency_point; j < higher_frequency_point; j++) {
+          auto& current_element = *(Y_data + (j * num_mel_bins) + i);
+          current_element = static_cast<T>((higher_frequency_point - j) / static_cast<T>(center_to_high));
+        }
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
+static Status create_mel_weight_matrix(OpKernelContext* ctx, onnx::TensorProto_DataType output_datatype,
+                                       int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate,
+                                       float lower_edge_hertz, float upper_edge_hertz) {
+  utils::MLTypeCallDispatcher<float, double, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t>
+      dispatcher(output_datatype);
+  return dispatcher.InvokeRet<Status, CreateMelWeightMatrix>(ctx, num_mel_bins, dft_length, sample_rate,
+                                                             lower_edge_hertz, upper_edge_hertz);
+}
+
+Status MelWeightMatrix::Compute(OpKernelContext* ctx) const {
+  const auto num_mel_bins = signal::get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(0));
+  const auto dft_length = signal::get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(1));
+  const auto sample_rate = signal::get_scalar_value_from_tensor<int64_t>(ctx->Input<Tensor>(2));
+  const auto lower_edge_hertz = signal::get_scalar_value_from_tensor<float>(ctx->Input<Tensor>(3));
+  const auto upper_edge_hertz = signal::get_scalar_value_from_tensor<float>(ctx->Input<Tensor>(4));
+
+  return create_mel_weight_matrix(ctx, data_type_, num_mel_bins, dft_length, sample_rate, lower_edge_hertz,
+                                  upper_edge_hertz);
+}
+}  // namespace onnxruntime
diff --git a/onnxruntime/contrib_ops/cpu/signal/window_functions.h b/onnxruntime/core/providers/cpu/signal/window_functions.h
similarity index 62%
rename from onnxruntime/contrib_ops/cpu/signal/window_functions.h
rename to onnxruntime/core/providers/cpu/signal/window_functions.h
index 81d8d3b48c656..994149b5ced06 100644
--- a/onnxruntime/contrib_ops/cpu/signal/window_functions.h
+++ b/onnxruntime/core/providers/cpu/signal/window_functions.h
@@ -1,40 +1,53 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
+#include "core/common/common.h"
+#include "core/framework/op_kernel.h"
 
 namespace onnxruntime {
-namespace contrib {
 
 class VariableOutputDataTypeBase : public OpKernel {
  protected:
   onnx::TensorProto_DataType data_type_;
 
  public:
-  VariableOutputDataTypeBase(const OpKernelInfo& info) : OpKernel(info) {
-    data_type_ = static_cast<onnx::TensorProto_DataType>(info.GetAttrOrDefault<int64_t>("output_datatype", onnx::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  explicit VariableOutputDataTypeBase(const OpKernelInfo& info) : OpKernel(info) {
+    data_type_ = static_cast<onnx::TensorProto_DataType>(  //
+        info.GetAttrOrDefault<int64_t>("output_datatype", onnx::TensorProto_DataType::TensorProto_DataType_FLOAT));
   }
 };
 
 class HannWindow final : public VariableOutputDataTypeBase {
  public:
   explicit HannWindow(const OpKernelInfo& info) : VariableOutputDataTypeBase(info) {
+    is_periodic_ = static_cast<bool>(info.GetAttrOrDefault<int64_t>("periodic", 1));
   }
   Status Compute(OpKernelContext* ctx) const override;
+
+ private:
+  bool is_periodic_ = true;
 };
 
 class HammingWindow final : public VariableOutputDataTypeBase {
  public:
   explicit HammingWindow(const OpKernelInfo& info) : VariableOutputDataTypeBase(info) {
+    is_periodic_ = static_cast<bool>(info.GetAttrOrDefault<int64_t>("periodic", 1));
   }
   Status Compute(OpKernelContext* ctx) const override;
+
+ private:
+  bool is_periodic_ = true;
 };
 
 class BlackmanWindow final : public VariableOutputDataTypeBase {
  public:
   explicit BlackmanWindow(const OpKernelInfo& info) : VariableOutputDataTypeBase(info) {
+    is_periodic_ = static_cast<bool>(info.GetAttrOrDefault<int64_t>("periodic", 1));
   }
   Status Compute(OpKernelContext* ctx) const override;
+
+ private:
+  bool is_periodic_ = true;
 };
 
 class MelWeightMatrix final : public VariableOutputDataTypeBase {
@@ -44,7 +57,4 @@ class MelWeightMatrix final : public VariableOutputDataTypeBase {
   Status Compute(OpKernelContext* ctx) const override;
 };
 
-}  // namespace contrib
 }  // namespace onnxruntime
-
-#endif
\ No newline at end of file
diff --git a/onnxruntime/test/contrib_ops/signal_ops_test.cc b/onnxruntime/test/contrib_ops/signal_ops_test.cc
deleted file mode 100644
index 3fe4ce75e604e..0000000000000
--- a/onnxruntime/test/contrib_ops/signal_ops_test.cc
+++ /dev/null
@@ -1,207 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#ifdef BUILD_MS_EXPERIMENTAL_OPS
-
-#include "gtest/gtest.h"
-#include "test/providers/provider_test_utils.h"
-
-namespace onnxruntime {
-namespace test {
-
-static void TestNaiveDFTFloat(bool is_onesided) {
-  OpTester test("DFT", 1, onnxruntime::kMSExperimentalDomain);
-
-  std::vector<int64_t> shape = {1, 5};
-  std::vector<int64_t> output_shape = {1, 5, 2};
-  output_shape[1] = is_onesided ? (1 + (shape[1] >> 1)) : shape[1];
-
-  std::vector<float> input = {1, 2, 3, 4, 5};
-  std::vector<float> expected_output = {
-    15.000000f, 0.0000000f,
-    -2.499999f, 3.4409550f,
-    -2.500000f, 0.8123000f,
-    -2.499999f, -0.812299f,
-    -2.500003f, -3.440953f
-  };
-
-  if (is_onesided) {
-    expected_output.resize(6);
-  }
-  test.AddInput<float>("input", shape, input);
-  test.AddAttribute<int64_t>("onesided", static_cast<int64_t>(is_onesided));
-  test.AddOutput<float>("output", output_shape, expected_output);
-  test.Run();
-}
-
-static void TestRadix2DFTFloat(bool is_onesided) {
-  OpTester test("DFT", 1, onnxruntime::kMSExperimentalDomain);
-
-  std::vector<int64_t> shape = {1, 8};
-  std::vector<int64_t> output_shape = {1, 8, 2};
-  output_shape[1] = is_onesided ? (1 + (shape[1] >> 1)) : shape[1];
-
-  std::vector<float> input = {1, 2, 3, 4, 5, 6, 7, 8};
-  std::vector<float> expected_output = {
-    36.000f, 0.000f,
-    -4.000f, 9.65685f,
-    -4.000f, 4.000f,
-    -4.000f, 1.65685f,
-    -4.000f, 0.000f,
-    -4.000f, -1.65685f,
-    -4.000f, -4.000f,
-    -4.000f, -9.65685f
-  };
-
-  if (is_onesided) {
-    expected_output.resize(10);
-  }
-  test.AddInput<float>("input", shape, input);
-  test.AddAttribute<int64_t>("onesided", static_cast<int64_t>(is_onesided));
-  test.AddOutput<float>("output", output_shape, expected_output);
-  test.Run();
-}
-
-TEST(MLSignalOpTest, DFTFloat) {
-  TestNaiveDFTFloat(false);
-  TestNaiveDFTFloat(true);
-  TestRadix2DFTFloat(false);
-  TestRadix2DFTFloat(true);
-}
-
-TEST(MLSignalOpTest, IDFTFloat) {
-  OpTester test("IDFT", 1, onnxruntime::kMSExperimentalDomain);
-  
-  std::vector<int64_t> shape = {1, 5, 2};
-  std::vector<float> input =
-  {
-    15.000000f, 0.0000000f,
-    -2.499999f, 3.4409550f,
-    -2.500000f, 0.8123000f,
-    -2.499999f, -0.812299f,
-    -2.500003f, -3.440953f
-  };
-  std::vector<float> expected_output =
-  {
-      1.000f, 0.000f,
-      2.000f, 0.000f,
-      3.000f, 0.000f,
-      4.000f, 0.000f,
-      5.000f, 0.000f
-  };
-  
-  test.AddInput<float>("input", shape, input);
-  test.AddOutput<float>("output", shape, expected_output);
-  test.Run();
-}
-
-TEST(MLSignalOpTest, STFTFloat) {
-  OpTester test("STFT", 1, onnxruntime::kMSExperimentalDomain);
-
-  std::vector<float> signal(64, 1);
-  test.AddInput<float>("signal", {1, 64}, signal);
-  std::vector<float> window(16, 1);
-  test.AddInput<float>("window", {16}, window);
-  test.AddInput<int64_t>("frame_length", {}, {16});
-  test.AddInput<int64_t>("frame_step", {}, {8});
-
-  std::vector<int64_t> output_shape = {1, 7, 9, 2};
-  std::vector<float> expected_output =
-  {
-    16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
-    16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
-    16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
-    16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
-    16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
-    16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
-    16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f
-  };
-  test.AddOutput<float>("output", output_shape, expected_output);
-  test.Run();
-}
-
-TEST(MLSignalOpTest, HannWindowFloat) {
-  OpTester test("HannWindow", 1, onnxruntime::kMSExperimentalDomain);
-
-  std::vector<int64_t> scalar_shape = {};
-  std::vector<int64_t> output_shape = {32};
-  std::vector<float> expected_output =
-  {
-    0.000000f, 0.009607f, 0.038060f, 0.084265f, 0.146447f, 0.222215f, 0.308658f, 0.402455f,
-    0.500000f, 0.597545f, 0.691342f, 0.777785f, 0.853553f, 0.915735f, 0.961940f, 0.990393f,
-    1.000000f, 0.990393f, 0.961940f, 0.915735f, 0.853553f, 0.777785f, 0.691342f, 0.597545f,
-    0.500000f, 0.402455f, 0.308658f, 0.222215f, 0.146447f, 0.084265f, 0.038060f, 0.009607f
-  };
-
-  test.AddInput<int64_t>("size", scalar_shape, {32});
-  test.AddOutput<float>("output", output_shape, expected_output);
-  test.Run();
-}
-
-TEST(MLSignalOpTest, HammingWindowFloat) {
-  OpTester test("HammingWindow", 1, onnxruntime::kMSExperimentalDomain);
-  
-  std::vector<int64_t> scalar_shape = {};
-  std::vector<int64_t> output_shape = {32};
-  std::vector<float> expected_output =
-  {
-    0.086957f, 0.095728f, 0.121707f, 0.163894f, 0.220669f, 0.289848f, 0.368775f, 0.454415f,
-    0.543478f, 0.632541f, 0.718182f, 0.797108f, 0.866288f, 0.923062f, 0.965249f, 0.991228f,
-    1.000000f, 0.991228f, 0.965249f, 0.923062f, 0.866288f, 0.797108f, 0.718182f, 0.632541f,
-    0.543478f, 0.454415f, 0.368775f, 0.289848f, 0.220669f, 0.163894f, 0.121707f, 0.095728f
-  };
-
-  test.AddInput<int64_t>("size", scalar_shape, {32});
-  test.AddOutput<float>("output", output_shape, expected_output);
-  test.Run();
-}
-
-TEST(MLSignalOpTest, BlackmanWindowFloat) {
-  OpTester test("BlackmanWindow", 1, onnxruntime::kMSExperimentalDomain);
-  
-  std::vector<int64_t> scalar_shape = {};
-  std::vector<int64_t> output_shape = {32};
-  std::vector<float> expected_output =
-  {
-    0.000000f, 0.003518f, 0.014629f, 0.034880f, 0.066447f, 0.111600f, 0.172090f, 0.248544f,
-    0.340000f, 0.443635f, 0.554773f, 0.667170f, 0.773553f, 0.866350f, 0.938508f, 0.984303f,
-    1.000000f, 0.984303f, 0.938508f, 0.866350f, 0.773553f, 0.667170f, 0.554773f, 0.443635f,
-    0.340000f, 0.248544f, 0.172090f, 0.111600f, 0.066447f, 0.034880f, 0.014629f, 0.003518f
-  };
-
-  test.AddInput<int64_t>("size", scalar_shape, {32});
-  test.AddOutput<float>("output", output_shape, expected_output);
-  test.Run();
-}
-
-TEST(MLSignalOpTest, MelWeightMatrixFloat) {
-  OpTester test("MelWeightMatrix", 1, onnxruntime::kMSExperimentalDomain);
-
-  std::vector<int64_t> scalar_shape = {};
-  std::vector<int64_t> output_shape = {9, 8};
-  std::vector<float> expected_output =
-  {
-    1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
-    0.000000f, 0.000000f, 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
-    0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f,
-    0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f,
-    0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f,
-    0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f,
-    0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
-    0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
-    0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f
-  };
-
-  test.AddInput<int64_t>("num_mel_bins", scalar_shape, {8});
-  test.AddInput<int64_t>("dft_length", scalar_shape, {16});
-  test.AddInput<int64_t>("sample_rate", scalar_shape, {8192});
-  test.AddInput<float>("lower_edge_hertz", scalar_shape, {0});
-  test.AddInput<float>("upper_edge_hertz", scalar_shape, {8192 / 2.f});
-  test.AddOutput<float>("output", output_shape, expected_output);
-  test.Run();
-}
-
-}  // namespace test
-}  // namespace onnxruntime
-
-#endif
\ No newline at end of file
diff --git a/onnxruntime/test/providers/cpu/signal/signal_ops_test.cc b/onnxruntime/test/providers/cpu/signal/signal_ops_test.cc
new file mode 100644
index 0000000000000..2db126b140607
--- /dev/null
+++ b/onnxruntime/test/providers/cpu/signal/signal_ops_test.cc
@@ -0,0 +1,242 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <functional>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/common/tensor_op_test_utils.h"
+#include "test/providers/provider_test_utils.h"
+#include "test/util/include/test_random_seed.h"
+
+using std::vector;
+
+namespace onnxruntime {
+namespace test {
+
+static const int kMinOpsetVersion = 17;
+
+static void TestNaiveDFTFloat(bool onesided) {
+  OpTester test("DFT", kMinOpsetVersion);
+
+  vector<int64_t> shape = {1, 5, 1};
+  vector<int64_t> output_shape = {1, 5, 2};
+  output_shape[1] = onesided ? (1 + (shape[1] >> 1)) : shape[1];
+
+  vector<float> input = {1, 2, 3, 4, 5};
+  vector<float> expected_output = {15.000000f, 0.0000000f, -2.499999f, 3.4409550f, -2.500000f,
+                                   0.8123000f, -2.499999f, -0.812299f, -2.500003f, -3.440953f};
+
+  if (onesided) {
+    expected_output.resize(6);
+  }
+  test.AddInput<float>("input", shape, input);
+  test.AddAttribute<int64_t>("onesided", static_cast<int64_t>(onesided));
+  test.AddOutput<float>("output", output_shape, expected_output);
+  test.Run();
+}
+
+static void TestRadix2DFTFloat(bool onesided) {
+  OpTester test("DFT", kMinOpsetVersion);
+
+  vector<int64_t> shape = {1, 8, 1};
+  vector<int64_t> output_shape = {1, 8, 2};
+  output_shape[1] = onesided ? (1 + (shape[1] >> 1)) : shape[1];
+
+  vector<float> input = {1, 2, 3, 4, 5, 6, 7, 8};
+  vector<float> expected_output = {36.000f, 0.000f, -4.000f, 9.65685f, -4.000f, 4.000f, -4.000f, 1.65685f,
+                                   -4.000f, 0.000f, -4.000f, -1.65685f, -4.000f, -4.000f, -4.000f, -9.65685f};
+
+  if (onesided) {
+    expected_output.resize(10);
+  }
+  test.AddInput<float>("input", shape, input);
+  test.AddAttribute<int64_t>("onesided", static_cast<int64_t>(onesided));
+  test.AddOutput<float>("output", output_shape, expected_output);
+  test.Run();
+}
+
+TEST(SignalOpsTest, DFTFloat_naive) { TestNaiveDFTFloat(false); }
+
+TEST(SignalOpsTest, DFTFloat_naive_onesided) { TestNaiveDFTFloat(true); }
+
+TEST(SignalOpsTest, DFTFloat_radix2) { TestRadix2DFTFloat(false); }
+
+TEST(SignalOpsTest, DFTFloat_radix2_onesided) { TestRadix2DFTFloat(true); }
+
+TEST(SignalOpsTest, DFTFloat_inverse) {
+  OpTester test("DFT", kMinOpsetVersion);
+
+  vector<int64_t> shape = {1, 5, 2};
+  vector<float> input = {15.000000f, 0.0000000f, -2.499999f, 3.4409550f, -2.500000f,
+                         0.8123000f, -2.499999f, -0.812299f, -2.500003f, -3.440953f};
+  vector<float> expected_output = {1.000f, 0.000f, 2.000f, 0.000f, 3.000f, 0.000f, 4.000f, 0.000f, 5.000f, 0.000f};
+
+  test.AddInput<float>("input", shape, input);
+  test.AddAttribute<int64_t>("inverse", static_cast<int64_t>(true));
+  test.AddOutput<float>("output", shape, expected_output);
+  test.Run();
+}
+
+// Tests that FFT(FFT(x), inverse=true) == x
+static void TestDFTInvertible(bool complex) {
+  // TODO: test dft_length
+  class DFTInvertibleTester : public OpTester {
+   public:
+    DFTInvertibleTester(int64_t axis) : OpTester("DFT", kMinOpsetVersion), axis_(axis) {}
+
+   protected:
+    void AddNodes(Graph& graph, vector<NodeArg*>& graph_inputs, vector<NodeArg*>& graph_outputs,
+                  vector<std::function<void(Node& node)>>& add_attribute_funcs) override {
+      // Create an intermediate output
+      vector<NodeArg*> intermediate_outputs{&graph.GetOrCreateNodeArg("dft_output", graph_outputs[0]->TypeAsProto())};
+
+      // call base implementation to add the DFT node.
+      OpTester::AddNodes(graph, graph_inputs, intermediate_outputs, add_attribute_funcs);
+      OpTester::AddAttribute("axis", axis_);
+
+      Node& inverse = graph.AddNode("inverse", "DFT", "inverse", intermediate_outputs, graph_outputs);
+      inverse.AddAttribute("inverse", static_cast<int64_t>(true));
+      inverse.AddAttribute("axis", axis_);
+    }
+
+   private:
+    int64_t axis_;
+  };
+
+  RandomValueGenerator random(GetTestRandomSeed());
+  // TODO(garymm, smk2007): Add tests for different dft_length values.
+  const int64_t num_batches = 2;
+  for (int64_t axis = 1; axis < 2; axis += 1) {
+    for (int64_t signal_dim1 = 1; signal_dim1 <= 4; signal_dim1 += 1) {
+      for (int64_t signal_dim2 = 1; signal_dim2 <= 4; signal_dim2 += 1) {
+        DFTInvertibleTester test(axis);
+        vector<int64_t> input_shape{num_batches, signal_dim1, signal_dim2, 1 + complex};
+        vector<float> input_data = random.Uniform<float>(input_shape, -100.f, 100.f);
+        test.AddInput("input", input_shape, input_data);
+
+        vector<int64_t> output_shape(input_shape);
+        vector<float>* output_data_p;
+        vector<float> output_data;
+        if (complex) {
+          output_data_p = &input_data;
+        } else {  // real -> (real, imaginary) with imaginary == 0.
+          output_shape[3] = 2;
+          output_data.resize(input_data.size() * 2, 0);
+          for (size_t i = 0; i < input_data.size(); i += 1) {
+            output_data[i * 2] = input_data[i];
+          }
+          output_data_p = &output_data;
+        }
+        test.AddOutput<float>("output", output_shape, *output_data_p);
+        test.Run();
+      }
+    }
+  }
+}
+
+TEST(SignalOpsTest, DFT_invertible_real) { TestDFTInvertible(false); }
+
+TEST(SignalOpsTest, DFT_invertible_complex) { TestDFTInvertible(true); }
+
+TEST(SignalOpsTest, STFTFloat) {
+  OpTester test("STFT", kMinOpsetVersion);
+
+  vector<float> signal(64, 1);
+  test.AddInput<float>("signal", {1, 64, 1}, signal);
+  test.AddInput<int64_t>("frame_step", {}, {8});
+  vector<float> window(16, 1);
+  test.AddInput<float>("window", {16}, window);
+  test.AddInput<int64_t>("frame_length", {}, {16});
+
+  vector<int64_t> output_shape = {1, 7, 9, 2};
+  vector<float> expected_output = {
+      16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f,
+      0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f};
+  test.AddOutput<float>("output", output_shape, expected_output);
+  test.Run();
+}
+
+TEST(SignalOpsTest, HannWindowFloat) {
+  OpTester test("HannWindow", kMinOpsetVersion);
+
+  vector<int64_t> scalar_shape = {};
+  vector<int64_t> output_shape = {32};
+  vector<float> expected_output = {0.000000f, 0.009607f, 0.038060f, 0.084265f, 0.146447f, 0.222215f, 0.308658f,
+                                   0.402455f, 0.500000f, 0.597545f, 0.691342f, 0.777785f, 0.853553f, 0.915735f,
+                                   0.961940f, 0.990393f, 1.000000f, 0.990393f, 0.961940f, 0.915735f, 0.853553f,
+                                   0.777785f, 0.691342f, 0.597545f, 0.500000f, 0.402455f, 0.308658f, 0.222215f,
+                                   0.146447f, 0.084265f, 0.038060f, 0.009607f};
+
+  test.AddInput<int64_t>("size", scalar_shape, {32});
+  test.AddOutput<float>("output", output_shape, expected_output);
+  test.Run();
+}
+
+TEST(SignalOpsTest, HammingWindowFloat) {
+  OpTester test("HammingWindow", kMinOpsetVersion);
+
+  vector<int64_t> scalar_shape = {};
+  vector<int64_t> output_shape = {32};
+  vector<float> expected_output =  //
+      {0.086957f, 0.095728f, 0.121707f, 0.163894f, 0.220669f, 0.289848f, 0.368775f, 0.454415f,
+       0.543478f, 0.632541f, 0.718182f, 0.797108f, 0.866288f, 0.923062f, 0.965249f, 0.991228f,
+       1.000000f, 0.991228f, 0.965249f, 0.923062f, 0.866288f, 0.797108f, 0.718182f, 0.632541f,
+       0.543478f, 0.454415f, 0.368775f, 0.289848f, 0.220669f, 0.163894f, 0.121707f, 0.095728f};
+
+  test.AddInput<int64_t>("size", scalar_shape, {32});
+  test.AddOutput<float>("output", output_shape, expected_output);
+  test.Run();
+}
+
+TEST(SignalOpsTest, BlackmanWindowFloat) {
+  OpTester test("BlackmanWindow", kMinOpsetVersion);
+
+  vector<int64_t> scalar_shape = {};
+  vector<int64_t> output_shape = {32};
+  vector<float> expected_output =  //
+      {0.000000f, 0.003518f, 0.014629f, 0.034880f, 0.066447f, 0.111600f, 0.172090f, 0.248544f,
+       0.340000f, 0.443635f, 0.554773f, 0.667170f, 0.773553f, 0.866350f, 0.938508f, 0.984303f,
+       1.000000f, 0.984303f, 0.938508f, 0.866350f, 0.773553f, 0.667170f, 0.554773f, 0.443635f,
+       0.340000f, 0.248544f, 0.172090f, 0.111600f, 0.066447f, 0.034880f, 0.014629f, 0.003518f};
+
+  test.AddInput<int64_t>("size", scalar_shape, {32});
+  test.AddOutput<float>("output", output_shape, expected_output);
+  test.Run();
+}
+
+TEST(SignalOpsTest, MelWeightMatrixFloat) {
+  OpTester test("MelWeightMatrix", kMinOpsetVersion);
+
+  vector<int64_t> scalar_shape = {};
+  vector<int64_t> output_shape = {9, 8};
+  vector<float> expected_output = {
+      1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+      0.000000f, 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+      0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+      0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+      0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+      0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+      0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+      0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f};
+
+  test.AddInput<int64_t>("num_mel_bins", scalar_shape, {8});
+  test.AddInput<int64_t>("dft_length", scalar_shape, {16});
+  test.AddInput<int64_t>("sample_rate", scalar_shape, {8192});
+  test.AddInput<float>("lower_edge_hertz", scalar_shape, {0});
+  test.AddInput<float>("upper_edge_hertz", scalar_shape, {8192 / 2.f});
+  test.AddOutput<float>("output", output_shape, expected_output);
+  test.Run();
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json b/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json
index 399e26fb35fd6..d9271921c1e90 100644
--- a/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json
+++ b/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json
@@ -299,6 +299,10 @@
         "BitShift ai.onnx CPUExecutionProvider",
         8765933529403563240
     ],
+    [
+        "BlackmanWindow ai.onnx CPUExecutionProvider",
+        4230790036355038984
+    ],
     [
         "Cast ai.onnx CPUExecutionProvider",
         4892631558605514456
@@ -463,6 +467,10 @@
         "Det ai.onnx CPUExecutionProvider",
         4355346295804324544
     ],
+    [
+        "DFT ai.onnx CPUExecutionProvider",
+        2809655513372322840
+    ],
     [
         "Div ai.onnx CPUExecutionProvider",
         3765227735719542728
@@ -911,7 +919,7 @@
         "GreaterOrEqual ai.onnx CPUExecutionProvider",
         17416867432093505280
     ],
-[
+    [
         "GreaterOrEqual ai.onnx CPUExecutionProvider",
         4445196831337347808
     ],
@@ -926,7 +934,7 @@
     [
         "GreaterOrEqual ai.onnx CPUExecutionProvider",
         16172564801671050120
-    ],	
+    ],
     [
         "GridSample ai.onnx CPUExecutionProvider",
         15150264021585158264
@@ -939,6 +947,14 @@
         "GRU ai.onnx CPUExecutionProvider",
         2706165712066264784
     ],
+    [
+        "HammingWindow ai.onnx CPUExecutionProvider",
+        7960927909626268504
+    ],
+    [
+        "HannWindow ai.onnx CPUExecutionProvider",
+        11998243503561799520
+    ],
     [
         "Hardmax ai.onnx CPUExecutionProvider",
         3471079605532327368
@@ -1018,7 +1034,7 @@
     [
         "LeakyRelu ai.onnx CPUExecutionProvider",
         830582302303937272
-    ],	
+    ],
     [
         "Less ai.onnx CPUExecutionProvider",
         2529281912870061232
@@ -1090,7 +1106,7 @@
     [
         "LessOrEqual ai.onnx CPUExecutionProvider",
         15565321713560893128
-    ],	
+    ],
     [
         "Log ai.onnx CPUExecutionProvider",
         268464912229648680
@@ -1287,6 +1303,10 @@
         "MeanVarianceNormalization ai.onnx CPUExecutionProvider",
         17242016597551698064
     ],
+    [
+        "MelWeightMatrix ai.onnx CPUExecutionProvider",
+        1589563865873170600
+    ],
     [
         "Min ai.onnx CPUExecutionProvider",
         5444634510407971152
@@ -1586,7 +1606,7 @@
     [
         "PRelu ai.onnx CPUExecutionProvider",
         17872917958807301128
-    ],	
+    ],
     [
         "QLinearConv ai.onnx CPUExecutionProvider",
         1301685544574905024
@@ -2230,7 +2250,7 @@
     [
         "Scan ai.onnx CPUExecutionProvider",
         220271302879298784
-    ],	
+    ],
     [
         "Scatter ai.onnx CPUExecutionProvider",
         15759064509848656392
@@ -2447,6 +2467,10 @@
         "Squeeze ai.onnx CPUExecutionProvider",
         16122603335179721968
     ],
+    [
+        "STFT ai.onnx CPUExecutionProvider",
+        1739051453790648552
+    ],
     [
         "StringNormalizer ai.onnx CPUExecutionProvider",
         7767393334034626736
@@ -2698,9 +2722,9 @@
     [
         "Where ai.onnx CPUExecutionProvider",
         17544214758602217832
-    ],	
+    ],
     [
         "Xor ai.onnx CPUExecutionProvider",
         14631049987911195736
     ]
-]
\ No newline at end of file
+]
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
index 091c573d38ae0..95b3516312f16 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
@@ -4,7 +4,6 @@
     //
     // Tests that are failing temporarily and should be fixed
     "current_failing_tests": [
-        "^test_(blackmanwindow|dft|hammingwindow|hannwindow|melweightmatrix|stft).*", // https://github.com/microsoft/onnxruntime/pull/11778
         "^test_adagrad",
         "^test_adagrad_multiple",
         "^test_batchnorm_epsilon_old",
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc
index 921e491b63510..8b2ec0246809e 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc
@@ -5,7 +5,9 @@
     // Val: float, max absolute difference between expected and actual.
     "atol_overrides": {
         "test_dft": 1e-4,
-        "test_dft_axis": 1e-4
+        "test_dft_axis": 1e-4,
+        "test_stft": 1e-4,
+        "test_stft_with_window": 1e-4
     },
     // Key: str, the name of the test as defined by ONNX without any device suffix.
     // Val: float, max relative difference between expected and actual.