Skip to content

Commit

Permalink
[GPU] Use ifm size of fc for dynamic quantize (#26850)
Browse files Browse the repository at this point in the history
### Details:
 - *DynamicQuantize requires the FC ifm size to generate the kernel.*
- *But it cannot be obtained with fully dynamic input shape, so it has
been queried from fc.*

### Tickets:
 - *152019*

---------

Co-authored-by: Mingyu Kim <mingyu.kim@intel.com>
  • Loading branch information
jade-cho and isanghao authored Oct 7, 2024
1 parent 46a6ccd commit 39c1b86
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "dynamic_quantize/dynamic_quantize_kernel_ref.h"
#include "dynamic_quantize/dynamic_quantize_kernel_selector.h"
#include "dynamic_quantize_inst.h"
#include "fully_connected_inst.h"

namespace cldnn {
namespace ocl {
Expand Down Expand Up @@ -37,6 +38,13 @@ struct dynamic_quantize_impl : typed_primitive_impl_ocl<dynamic_quantize> {
auto params = get_default_params<kernel_selector::dynamic_quantize_params>(impl_param, is_shape_agnostic);
params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1)));

// In Some model, the feature size could be dynamic in input0.
// It refers to IFM value of weight of fully connected.
auto user_node = impl_param.prog->get_node(impl_param.desc->id).get_users().front();
if (user_node != nullptr && user_node->is_type<fully_connected>()) {
auto& fc_node = user_node->as<fully_connected>();
params.fc_ifm_size = fc_node.weights().get_output_layout().feature();
}
return params;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ static std::pair<size_t, size_t> get_input_bf_size(const dynamic_quantize_params
input_batch = params.inputs[0].Batch().v * params.inputs[0].Feature().v;
}

// In Some model, input_f could be dynamic in input0. It refers to IFM value of weight.
if (params.inputs[0].is_dynamic() && input_f == 0) {
OPENVINO_ASSERT(params.fc_ifm_size != 0, "[GPU] Invalid fc_ifm_size value");
input_f = params.fc_ifm_size;
}

return {input_batch, input_f};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ namespace kernel_selector {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct dynamic_quantize_params : public base_params {
dynamic_quantize_params() : base_params(KernelType::DYNAMIC_QUANTIZE) {}
size_t fc_ifm_size = 0;
};

class DynamicQuantizeKernelRef : public KernelBaseOpenCL {
Expand Down

0 comments on commit 39c1b86

Please sign in to comment.