From 439bae734f593fa81ec8647b4c75665af69c0c8a Mon Sep 17 00:00:00 2001 From: Arunprakash-A Date: Thu, 25 Jan 2024 22:16:38 +0530 Subject: [PATCH 1/2] compute std in layernorm as in original paper --- AnnotatedTransformer.ipynb | 2 +- the_annotated_transformer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AnnotatedTransformer.ipynb b/AnnotatedTransformer.ipynb index 0f7da7d..eafeec6 100644 --- a/AnnotatedTransformer.ipynb +++ b/AnnotatedTransformer.ipynb @@ -528,7 +528,7 @@ "\n", " def forward(self, x):\n", " mean = x.mean(-1, keepdim=True)\n", - " std = x.std(-1, keepdim=True)\n", + " std = x.std(-1,correction=0,keepdim=True)\n", " return self.a_2 * (x - mean) / (std + self.eps) + self.b_2" ] }, diff --git a/the_annotated_transformer.py b/the_annotated_transformer.py index 4aa1d46..fa2a5b6 100644 --- a/the_annotated_transformer.py +++ b/the_annotated_transformer.py @@ -323,7 +323,7 @@ def __init__(self, features, eps=1e-6): def forward(self, x): mean = x.mean(-1, keepdim=True) - std = x.std(-1, keepdim=True) + std = x.std(-1,correction=0,keepdim=True) return self.a_2 * (x - mean) / (std + self.eps) + self.b_2 From 515560c29a623218f417aed76042f417c4c07860 Mon Sep 17 00:00:00 2001 From: Arunprakash-A Date: Thu, 25 Jan 2024 22:35:24 +0530 Subject: [PATCH 2/2] computed biased std in layernorm as in the original paper --- AnnotatedTransformer.ipynb | 2 +- the_annotated_transformer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AnnotatedTransformer.ipynb b/AnnotatedTransformer.ipynb index eafeec6..3478c4b 100644 --- a/AnnotatedTransformer.ipynb +++ b/AnnotatedTransformer.ipynb @@ -528,7 +528,7 @@ "\n", " def forward(self, x):\n", " mean = x.mean(-1, keepdim=True)\n", - " std = x.std(-1,correction=0,keepdim=True)\n", + " std = x.std(-1,unbiased=False,keepdim=True)\n", " return self.a_2 * (x - mean) / (std + self.eps) + self.b_2" ] }, diff --git a/the_annotated_transformer.py b/the_annotated_transformer.py index fa2a5b6..9a76925 100644 --- a/the_annotated_transformer.py +++ b/the_annotated_transformer.py @@ -323,7 +323,7 @@ def __init__(self, features, eps=1e-6): def forward(self, x): mean = x.mean(-1, keepdim=True) - std = x.std(-1,correction=0,keepdim=True) + std = x.std(-1,unbiased=False,keepdim=True) return self.a_2 * (x - mean) / (std + self.eps) + self.b_2