Add layer norm op (#101)

* add layernorm op * support layernorm * code format
Oneflow-Inc · Oct 19, 2022 · 74a2b70 · 74a2b70
1 parent 3a3e84f
commit 74a2b70
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 0 deletions.
diff --git a/examples/oneflow2onnx/nodes/GPU/test_layer_norm.py b/examples/oneflow2onnx/nodes/GPU/test_layer_norm.py
@@ -0,0 +1,53 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import tempfile
+import oneflow as flow
+from oneflow_onnx.oneflow2onnx.util import convert_to_onnx_and_check
+
+
+class LayerNorm(flow.nn.Module):
+    def __init__(self) -> None:
+        super(LayerNorm, self).__init__()
+        self.norm = flow.nn.LayerNorm([5, 10, 10], elementwise_affine=False)
+
+    def forward(self, x: flow.Tensor) -> flow.Tensor:
+        y = self.norm(x)
+        return y
+
+
+layernorm = LayerNorm().to("cuda")
+layernorm.eval()
+
+
+class LayerNormOpGraph(flow.nn.Graph):
+    def __init__(self):
+        super().__init__()
+        self.m = layernorm
+
+    def build(self, x):
+        out = self.m(x)
+        return out
+
+
+def test_layernorm():
+
+    layernorm_graph = LayerNormOpGraph()
+    layernorm_graph._compile(flow.randn(20, 5, 10, 10).to("cuda"))
+
+    convert_to_onnx_and_check(layernorm_graph, onnx_model_path="/tmp", opset=9, device="gpu")
+
+
+test_layernorm()
diff --git a/oneflow_onnx/oneflow2onnx/handlers/nn.py b/oneflow_onnx/oneflow2onnx/handlers/nn.py
@@ -532,3 +532,33 @@ def Version_13(cls, ctx, node, **kwargs):
             sizes.append(node_sizes[1])
             sizes_node = ctx.MakeConst(oneflow._oneflow_internal.UniqueStr("sizes"), np.array(sizes).astype(np.int64),)
             node.input_tensor_names.append(sizes_node.output_tensor_names[0])
+
+
+@flow_op(["layer_norm"])
+class LayerNorm:
+    @classmethod
+    def Version_9(cls, ctx, node, **kwargs):
+        dtypes = node.output_dtypes
+        input_shape = ctx.get_shape(node.input_tensor_names[0])
+        center = node.attrs["center"]  # bool
+        scale = node.attrs["scale"]  # bool
+        begin_norm_axis = node.attrs["begin_norm_axis"]  # int
+        begin_params_axis = node.attrs["begin_params_axis"]  # int
+        epsilon = node.attrs["epsilon"]  # float
+
+        axes = [-i for i in range(len(input_shape) - begin_norm_axis, 0, -1)]
+        two_cast = ctx.MakeConst(oneflow._oneflow_internal.UniqueStr("two"), np.array(2.0, dtype=util.Onnx2NumpyDtype(dtypes[0])))
+        eps_cast = ctx.MakeConst(oneflow._oneflow_internal.UniqueStr("eps"), np.array(epsilon, dtype=util.Onnx2NumpyDtype(dtypes[0])))
+        mean = ctx.MakeNode("ReduceMean", [node.input_tensor_names[0]], op_name_scope=node.name, name="mean_1", dtypes=[dtypes[0]], attr={"axes": axes, "keepdims": True})
+        numerator = ctx.MakeNode("Sub", [node.input_tensor_names[0], mean.output_tensor_names[0]], op_name_scope=node.name, name="numerator", dtypes=[dtypes[0]])
+        pow_node = ctx.MakeNode("Pow", [numerator.output_tensor_names[0], two_cast.output_tensor_names[0]], op_name_scope=node.name, name="pow_node", dtypes=[dtypes[0]])
+        variance = ctx.MakeNode("ReduceMean", [pow_node.output_tensor_names[0]], op_name_scope=node.name, name="mean_2", dtypes=[dtypes[0]], attr={"axes": axes, "keepdims": True})
+        add_node_1 = ctx.MakeNode("Add", [variance.output_tensor_names[0], eps_cast.output_tensor_names[0]], op_name_scope=node.name, name="add_node_1", dtypes=[dtypes[0]])
+        denominator = ctx.MakeNode("Sqrt", [add_node_1.output_tensor_names[0]], op_name_scope=node.name, name="denominator", dtypes=[dtypes[0]])
+        normalized = ctx.MakeNode("Div", [numerator.output_tensor_names[0], denominator.output_tensor_names[0]], op_name_scope=node.name, name="normalized", dtypes=[dtypes[0]])
+        if scale:
+            normalized = ctx.MakeNode("Mul", [normalized.output_tensor_names[0], node.input_tensor_names[1]], op_name_scope=node.name, name="normalized_scale", dtypes=[dtypes[0]])
+        if center:
+            normalized = ctx.MakeNode("Add", [normalized.output_tensor_names[0], node.input_tensor_names[2]], op_name_scope=node.name, name="normalized_center", dtypes=[dtypes[0]])
+        ctx.RemoveNode(node.name)
+        ctx.MakeNode("Identity", [normalized.output_tensor_names[0]], outputs=[node.output_tensor_names[0]], op_name_scope=node.name, name="rdenominator", dtypes=[dtypes[0]])