[Mixed] Mixed Precision Layer update

This PR is to update the mixed precision layer. - integrate #2568 & #2455 - will update more test **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK <donghak.park@samsung.com>
nnstreamer · May 17, 2024 · 40a5233 · 40a5233
1 parent 72917b8
commit 40a5233
Show file tree

Hide file tree

Showing 13 changed files with 39 additions and 302 deletions.
diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp
@@ -157,16 +157,6 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
   return weights[idx]->getVariableRef();
 }
 
-/**
- * @brief Get the Weight tensor object
- *
- * @param idx Identifier of the weight
- * @return Tensor& Reference to the weight tensor
- */
-Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
-  return weights[idx]->getVariableMasterRef();
-}
-
 /**
  * @brief Get the Weight Gradient tensor object
  *
@@ -205,18 +195,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
   return weights[idx]->getOptimizerVariableRef(jdx);
 }
 
-/**
- * @brief Get the Weight Optimizer Variable tensor object
- *
- * @param idx Identifier of the weight
- * @param jdx Identifier of the optimizer variables
- * @return Tensor& Reference to the weight optimizer variable tensor
- */
-Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
-                                               unsigned int jdx) const {
-  return weights[idx]->getOptimizerMasterVariableRef(jdx);
-}
-
 /**
  * @brief Get the Number of Weight Optimizer Variable tensor object
  *
@@ -227,16 +205,6 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
   return weights[idx]->getNumOptVariable();
 }
 
-/**
- * @brief Get the Number of Weight Optimizer Variable tensor object
- *
- * @param idx Identifier of the weight
- * @return int Number of the weight optimizer variable
- */
-unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
-  return weights[idx]->getNumOptMasterVariable();
-}
-
 /**
  * @brief Get regularization loss for the weight
  *
@@ -376,25 +344,6 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
   return getInputGrad(idx);
 }
 
-bool RunLayerContext::validateDerivatives() {
-  auto num_in = getNumInputs();
-  auto num_out = getNumOutputs();
-
-  for (unsigned int i = 0; i < num_in; ++i) {
-    auto deriv = getIncomingDerivative(i);
-    if (deriv.checkDataValidation(false) == false)
-      return false;
-  }
-
-  for (unsigned int i = 0; i < num_out; ++i) {
-    auto deriv = getOutgoingDerivative(i);
-    if (deriv.checkDataValidation(false) == false)
-      return false;
-  }
-
-  return true;
-}
-
 /**
  * @brief Get the Tensor object
  *

diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h
@@ -474,14 +474,6 @@ class RunLayerContext {
    */
   Tensor &getWeight(unsigned int idx) const;
 
-  /**
-   * @brief Get the Weight master tensor object
-   *
-   * @param idx Identifier of the weight
-   * @return Tensor& Reference to the weight tensor
-   */
-  Tensor *getWeightMaster(unsigned int idx) const;
-
   /**
    * @brief Get the Weight Gradient tensor object
    *
@@ -509,15 +501,6 @@ class RunLayerContext {
    */
   Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
 
-  /**
-   * @brief Get the Weight Optimizer Master Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @param jdx Identifier of the weight optimizer master variable
-   * @return Tensor& Reference to the weight optimizer tensor
-   */
-  Tensor &getWeightOptMasterVar(unsigned int idx, unsigned int jdx) const;
-
   /**
    * @brief Get the Weight name
    *
@@ -628,11 +611,6 @@ class RunLayerContext {
    */
   Tensor &getOutgoingDerivative(unsigned int idx);
 
-  /**
-   * @brief validate input/output derivatives of the layer
-   */
-  bool validateDerivatives();
-
   /**
    * @brief Get the Tensor object
    *
@@ -762,29 +740,13 @@ class RunLayerContext {
    */
   unsigned int getNumWeightOptVar(unsigned int idx) const;
 
-  /**
-   * @brief Get the Number of Weight Optimizer Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @return unsigned int Number of the weight optimizer variable
-   */
-  unsigned int getNumWeightOptMasterVar(unsigned int idx) const;
-
   /**
    * @brief Get the number of requested tensors objects
    *
    * @return unsigned int number of requested tensors
    */
   unsigned int getNumTensors() const { return tensors.size(); }
 
-  /**
-   * @brief Set the Weight Optimizer Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @param jdx Identifier of the weight optimizer variable
-   */
-  void setWeightOptVars(unsigned int idx, std::vector<Tensor *> opts);
-
   /**
    * @brief Set the batch for the run context
    *

diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp
@@ -16,7 +16,6 @@
 #include <cmath>
 #include <iterator>
 #include <stdexcept>
-#include <tuple>
 #include <utility>
 
 #include <activation_layer.h>
@@ -466,24 +465,16 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       if (run_context->isGradientLastAccess(i) && getTrainable()) {
         /// @note read optimizer variables
-        auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
         for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
-          if (num_w_opt_m > 0)
-            run_context->getWeightOptMasterVar(i, j).read(file);
-          else
-            run_context->getWeightOptVar(i, j).read(file);
+          run_context->getWeightOptVar(i, j).read(file);
         }
       }
     }
   } else {
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       /// @note shared weights are only be read at the first acecss
       if (run_context->isGradientLastAccess(i)) {
-        auto w = run_context->getWeightMaster(i);
-        if (w)
-          w->read(file);
-        else
-          run_context->getWeight(i).read(file);
+        run_context->getWeight(i).read(file);
       }
     }
   }
@@ -498,13 +489,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
       if (run_context->isGradientLastAccess(i) && getTrainable()) {
         // @note save optimizer variables
         if (run_context->weightHasGradient(i)) {
-          auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
           for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
                ++j) {
-            if (num_w_opt_m > 0)
-              run_context->getWeightOptMasterVar(i, j).save(file);
-            else
-              run_context->getWeightOptVar(i, j).save(file);
+            run_context->getWeightOptVar(i, j).save(file);
           }
         }
       }
@@ -513,13 +500,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
     // @note shared weights are only be saved at the first access
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       if (run_context->isGradientLastAccess(i)) {
-        if (run_context->getNumWeights()) {
-          auto w = run_context->getWeightMaster(i);
-          if (w)
-            w->save(file);
-          else
-            run_context->getWeight(i).save(file);
-        }
+        run_context->getWeight(i).save(file);
       }
     }
   }

diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h
@@ -900,11 +900,6 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
    */
   bool needsCalcGradient() { return needs_calc_gradient; }
 
-  /**
-   * @brief Set loss scale factor
-   */
-  void setLossScale(float scale) { layer->setLossScale(scale); }
-
 private:
   /**
    * @brief     Get the Input Layers object

diff --git a/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp
@@ -61,9 +61,6 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   y.apply<float>(ActiFunc::sigmoid<float>, ret_derivative);
-
-  applyLossScale(ret_derivative);
-
   ret_derivative.subtract_i(y2);
   if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
     throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "

diff --git a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
@@ -30,14 +30,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   // fill the output
-  auto out_type = hidden_.getDataType();
-  if (out_type == ml::train::TensorDim::DataType::FP32) {
-    if (y.getDataType() != out_type) {
-      Tensor y_ = y.clone(out_type);
-      hidden_ = y_.apply(ActiFunc::softmax<float>, hidden_);
-    } else {
-      hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
-    }
+  auto dataType = y.getDataType();
+  if (dataType == ml::train::TensorDim::DataType::FP32) {
+    hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
 
     if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
       Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -48,14 +43,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
       // update the loss value
       LossLayer::updateLoss(context, l);
     }
-  } else if (out_type == ml::train::TensorDim::DataType::FP16) {
+  } else if (dataType == ml::train::TensorDim::DataType::FP16) {
 #ifdef ENABLE_FP16
-    if (y.getDataType() != out_type) {
-      Tensor y_ = y.clone(out_type);
-      hidden_ = y_.apply(ActiFunc::softmax<_FP16>, hidden_);
-    } else {
-      hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
-    }
+    hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
 
     if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
       Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -78,8 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   auto dataType = y.getDataType();
-
-  Tensor ret(y.getDim());
+  Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
   if (dataType == ml::train::TensorDim::DataType::FP32) {
     y.apply(ActiFunc::softmax<float>, ret);
   } else if (dataType == ml::train::TensorDim::DataType::FP16) {
@@ -94,18 +83,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
   /// operation
   // TODO: verify y and ret_derivative must not be same as loss layer is not
   // working in-place
-  if (ret.getDataType() != y2.getDataType()) {
-    ret.subtract(y2.clone(ret.getDataType()), ret_derivative);
-  } else {
-    ret.subtract(y2, ret_derivative);
-  }
-
-  /**
-   * loss scale is applied for mixed precision
-   * every loss layers need to specify this applying code.
-   */
-  applyLossScale(ret_derivative);
-
+  ret.subtract(y2, ret_derivative);
   if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) {
     throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] "
                              "Error when calculating loss");

diff --git a/nntrainer/layers/loss/loss_layer.cpp b/nntrainer/layers/loss/loss_layer.cpp
@@ -15,9 +15,6 @@
 #include <loss_layer.h>
 
 namespace nntrainer {
-
-LossLayer::LossLayer() : Layer(), loss_scale(0.0f) {}
-
 void LossLayer::finalize(InitLayerContext &context) {
   std::vector<TensorDim> input_dim = context.getInputDimensions();
   std::vector<TensorDim> output_dim = input_dim;

diff --git a/nntrainer/layers/loss/loss_layer.h b/nntrainer/layers/loss/loss_layer.h
@@ -27,11 +27,6 @@ namespace nntrainer {
  */
 class LossLayer : public Layer {
 public:
-  /**
-   * @brief     Constructor of Loss Layer
-   */
-  LossLayer();
-
   /**
    * @brief     Destructor of Loss Layer
    */
@@ -52,19 +47,11 @@ class LossLayer : public Layer {
    */
   virtual bool supportBackwarding() const override { return true; }
 
-  /**
-   * @brief Set loss scale factor
-   */
-  virtual void setLossScale(float scale) override { loss_scale = scale; }
-
-private:
   /**
    * @copydoc Layer::requireLabel()
    */
   bool requireLabel() const override { return true; }
 
-  float loss_scale; /**< loss scale factor */
-
 protected:
   /**
    * @brief     update loss

diff --git a/nntrainer/layers/loss/meson.build b/nntrainer/layers/loss/meson.build
@@ -7,9 +7,7 @@ loss_layer_sources = [
   'constant_derivative_loss_layer.cpp'
 ]
 
-loss_layer_headers = [
-  'loss_layer.h'
-]
+loss_layer_headers = []
 
 loss_layer_deps = []