From 39c1b86fec9f5ce016025356e3dfd9410853d231 Mon Sep 17 00:00:00 2001 From: Jade Cho Date: Tue, 8 Oct 2024 02:52:20 +0900 Subject: [PATCH] [GPU] Use ifm size of fc for dynamic quantize (#26850) ### Details: - *DynamicQuantize requires the FC ifm size to generate the kernel.* - *But it cannot be obtained with fully dynamic input shape, so it has been queried from fc.* ### Tickets: - *152019* --------- Co-authored-by: Mingyu Kim --- .../intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp | 8 ++++++++ .../dynamic_quantize/dynamic_quantize_kernel_opt.cpp | 6 ++++++ .../dynamic_quantize/dynamic_quantize_kernel_ref.h | 1 + 3 files changed, 15 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp index 91f141ae062723..c3d436eb9c9b8d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp @@ -7,6 +7,7 @@ #include "dynamic_quantize/dynamic_quantize_kernel_ref.h" #include "dynamic_quantize/dynamic_quantize_kernel_selector.h" #include "dynamic_quantize_inst.h" +#include "fully_connected_inst.h" namespace cldnn { namespace ocl { @@ -37,6 +38,13 @@ struct dynamic_quantize_impl : typed_primitive_impl_ocl { auto params = get_default_params(impl_param, is_shape_agnostic); params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); + // In Some model, the feature size could be dynamic in input0. + // It refers to IFM value of weight of fully connected. + auto user_node = impl_param.prog->get_node(impl_param.desc->id).get_users().front(); + if (user_node != nullptr && user_node->is_type()) { + auto& fc_node = user_node->as(); + params.fc_ifm_size = fc_node.weights().get_output_layout().feature(); + } return params; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp index d4b5268eaee4e4..6a678770e85d72 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp @@ -19,6 +19,12 @@ static std::pair get_input_bf_size(const dynamic_quantize_params input_batch = params.inputs[0].Batch().v * params.inputs[0].Feature().v; } + // In Some model, input_f could be dynamic in input0. It refers to IFM value of weight. + if (params.inputs[0].is_dynamic() && input_f == 0) { + OPENVINO_ASSERT(params.fc_ifm_size != 0, "[GPU] Invalid fc_ifm_size value"); + input_f = params.fc_ifm_size; + } + return {input_batch, input_f}; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h index ce52ed9fb19714..c46b6b2685a940 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.h @@ -12,6 +12,7 @@ namespace kernel_selector { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct dynamic_quantize_params : public base_params { dynamic_quantize_params() : base_params(KernelType::DYNAMIC_QUANTIZE) {} + size_t fc_ifm_size = 0; }; class DynamicQuantizeKernelRef : public KernelBaseOpenCL {