dotnet · tannergooding · Jul 4, 2024 · Jul 4, 2024 · Jul 6, 2024 · Jul 6, 2024
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
@@ -372,16 +372,6 @@ public static Vector128<T> operator >>>(Vector128<T> value, int shiftCount)
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         public override bool Equals([NotNullWhen(true)] object? obj) => (obj is Vector128<T> other) && Equals(other);
 
-        // Account for floating-point equality around NaN
-        // This is in a separate method so it can be optimized by the mono interpreter/jiterpreter
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static bool EqualsFloatingPoint(Vector128<T> lhs, Vector128<T> rhs)
-        {
-            Vector128<T> result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs));
-            return result.AsInt32() == Vector128<int>.AllBitsSet;
-        }
-
         /// <summary>Determines whether the specified <see cref="Vector128{T}" /> is equal to the current instance.</summary>
         /// <param name="other">The <see cref="Vector128{T}" /> to compare with the current instance.</param>
         /// <returns><c>true</c> if <paramref name="other" /> is equal to the current instance; otherwise, <c>false</c>.</returns>
@@ -395,7 +385,8 @@ public bool Equals(Vector128<T> other)
             {
                 if ((typeof(T) == typeof(double)) || (typeof(T) == typeof(float)))
                 {
-                    return EqualsFloatingPoint(this, other);
+                    Vector128<T> result = Vector128.Equals(this, other) | ~(Vector128.Equals(this, this) | Vector128.Equals(other, other));
+                    return result.AsInt32() == Vector128<int>.AllBitsSet;
                 }
                 else
                 {

@@ -3748,33 +3748,6 @@ function emit_simd_3 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrin
                 builder.appendU8(WasmOpcode.i32_eqz);
             append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
             return true;
-        case SimdIntrinsic3.V128_R4_FLOAT_EQUALITY:
-        case SimdIntrinsic3.V128_R8_FLOAT_EQUALITY: {
-            /*
-            Vector128<T> result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs));
-            return result.AsInt32() == Vector128<int>.AllBitsSet;
-            */
-            const isR8 = index === SimdIntrinsic3.V128_R8_FLOAT_EQUALITY,
-                eqOpcode = isR8 ? WasmSimdOpcode.f64x2_eq : WasmSimdOpcode.f32x4_eq;
-            builder.local("pLocals");
-            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
-            builder.local("math_lhs128", WasmOpcode.tee_local);
-            append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
-            builder.local("math_rhs128", WasmOpcode.tee_local);
-            builder.appendSimd(eqOpcode);
-            builder.local("math_lhs128");
-            builder.local("math_lhs128");
-            builder.appendSimd(eqOpcode);
-            builder.local("math_rhs128");
-            builder.local("math_rhs128");
-            builder.appendSimd(eqOpcode);
-            builder.appendSimd(WasmSimdOpcode.v128_or);
-            builder.appendSimd(WasmSimdOpcode.v128_not);
-            builder.appendSimd(WasmSimdOpcode.v128_or);
-            builder.appendSimd(isR8 ? WasmSimdOpcode.i64x2_all_true : WasmSimdOpcode.i32x4_all_true);
-            append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
-            return true;
-        }
         case SimdIntrinsic3.V128_I1_SHUFFLE: {
             // Detect a constant indices vector and turn it into a const. This allows
             //  v8 to use a more optimized implementation of the swizzle opcode

@@ -31,6 +31,8 @@
 #define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)
 #define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT)
 #define SIZEOF_V128 16
+#define SIZEOF_V2 8
+#define SIZEOF_V3 12
 
 #define INTERP_STACK_SIZE (1024*1024)
 #define INTERP_REDZONE_SIZE (8*1024)

@@ -58,8 +58,11 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_o
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, -1)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, -1)
 
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY, interp_v128_r4_float_equality, -1)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY, interp_v128_r8_float_equality, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4, interp_v128_instance_equals_r4, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4, interp_v2_instance_equals_r4, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4, interp_v3_instance_equals_r4, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8, interp_v128_instance_equals_r8, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITWISE, interp_v128_instance_equals_bitwise, -1)
 
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81)
 
@@ -71,6 +74,12 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_DIVISION, interp_v128_r4_op_division, 231)
 
 INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_BITCAST, interp_v128_bitcast, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V2, interp_v128_to_v2, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V3, interp_v128_to_v3, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V128, interp_v2_to_v128, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V3, interp_v2_to_v3, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V128, interp_v3_to_v128, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V2, interp_v3_to_v2, -1)
 
 INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation, 97)
 INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation, 129)

@@ -6,6 +6,8 @@
 #include <wasm_simd128.h>
 #endif
 
+#include <mono/utils/mono-math.h>
+
 #ifdef INTERP_ENABLE_SIMD
 
 gboolean interp_simd_enabled = TRUE;
@@ -35,6 +37,65 @@ interp_v128_bitcast (gpointer res, gpointer v1)
 	*(v128_i1*)res = *(v128_i1*)v1;
 }
 
+// Vector2 AsVector2(Vector128<float> v1)
+static void
+interp_v128_to_v2 (gpointer res, gpointer v1)
+{
+	memcpy (res, v1, SIZEOF_V2);
+}
+
+// Vector3 AsVector3(Vector128<float> v1)
+static void
+interp_v128_to_v3 (gpointer res, gpointer v1)
+{
+	memcpy (res, v1, SIZEOF_V3);
+}
+
+// Vector128<float> AsVector128(Vector2 v1)
+static void
+interp_v2_to_v128 (gpointer res, gpointer v1)
+{
+	float *res_typed = (float*)res;
+	float *v1_typed = (float*)v1;
+
+	res_typed [0] = v1_typed [0];
+	res_typed [1] = v1_typed [1];
+	res_typed [2] = 0;
+	res_typed [3] = 0;
+}
+
+// Vector3 AsVector3(Vector2 v1)
+static void
+interp_v2_to_v3 (gpointer res, gpointer v1)
+{
+	float *res_typed = (float*)res;
+	float *v1_typed = (float*)v1;
+
+	res_typed [0] = v1_typed [0];
+	res_typed [1] = v1_typed [1];
+	res_typed [2] = 0;
+}
+
+// Vector128<float> AsVector128(Vector3 v1)
+static void
+interp_v3_to_v128 (gpointer res, gpointer v1)
+{
+	float *res_typed = (float*)res;
+	float *v1_typed = (float*)v1;
+
+	res_typed [0] = v1_typed [0];
+	res_typed [1] = v1_typed [1];
+	res_typed [2] = v1_typed [2];
+	res_typed [3] = 0;
+}
+
+// Vector2 AsVector128(Vector3 v1)
+static void
+interp_v3_to_v2 (gpointer res, gpointer v1)
+{
+	memcpy (res, v1, SIZEOF_V2);
+}
+
 // op_Addition
 static void
 interp_v128_i1_op_addition (gpointer res, gpointer v1, gpointer v2)
@@ -132,29 +193,63 @@ interp_v128_op_bitwise_inequality (gpointer res, gpointer v1, gpointer v2)
 		*(gint32*)res = 1;
 }
 
-// Vector128<float>EqualsFloatingPoint
+// Vector128<float>.Equals
 static void
-interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2)
+interp_v128_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2)
 {
-	v128_r4 v1_cast = *(v128_r4*)v1;
+	v128_r4 v1_cast = **(v128_r4**)v1;
 	v128_r4 v2_cast = *(v128_r4*)v2;
 	v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
 	memset (&v1_cast, 0xff, SIZEOF_V128);
 
 	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
 }
 
+// Vector2.Equals
+static void
+interp_v2_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2)
+{
+	v128_r4 v1_cast;
+	interp_v2_to_v128 (&v1_cast, v1);
+	v128_r4 v2_cast = *(v128_r4*)v2;
+	v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
+	memset (&v1_cast, 0xff, SIZEOF_V2);
+
+	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V2) == 0;
+}
+
+// Vector3.Equals
 static void
-interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2)
+interp_v3_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2)
 {
-	v128_r8 v1_cast = *(v128_r8*)v1;
+	v128_r4 v1_cast;
+	interp_v3_to_v128 (&v1_cast, v1);
+	v128_r4 v2_cast = *(v128_r4*)v2;
+	v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
+	memset (&v1_cast, 0xff, SIZEOF_V3);
+
+	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V3) == 0;
+}
+
+// Vector128<double>.Equals
+static void
+interp_v128_instance_equals_r8 (gpointer res, gpointer v1, gpointer v2)
+{
+	v128_r8 v1_cast = **(v128_r8**)v1;
 	v128_r8 v2_cast = *(v128_r8*)v2;
 	v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
 	memset (&v1_cast, 0xff, SIZEOF_V128);
 
 	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
 }
 
+// Vector128<T>.Equals, for integer T
+static void
+interp_v128_instance_equals_bitwise (gpointer res, gpointer v1, gpointer v2)
+{
+	interp_v128_op_bitwise_equality(res, *(v128_i1**)v1, v2);
+}
+
 // op_Multiply
 static void
 interp_v128_i1_op_multiply (gpointer res, gpointer v1, gpointer v2)

@@ -39,16 +39,18 @@ SIMD_METHOD(AsUInt16)
 SIMD_METHOD(AsUInt32)
 SIMD_METHOD(AsUInt64)
 SIMD_METHOD(AsVector)
-SIMD_METHOD(AsVector4)
 SIMD_METHOD(AsVector128)
+SIMD_METHOD(AsVector128Unsafe)
+SIMD_METHOD(AsVector2)
+SIMD_METHOD(AsVector3)
+SIMD_METHOD(AsVector4)
 SIMD_METHOD(ConditionalSelect)
 SIMD_METHOD(Create)
 SIMD_METHOD(CreateScalar)
 SIMD_METHOD(CreateScalarUnsafe)
 
 SIMD_METHOD(Equals)
 SIMD_METHOD(EqualsAny)
-SIMD_METHOD(EqualsFloatingPoint)
 SIMD_METHOD(ExtractMostSignificantBits)
 SIMD_METHOD(GreaterThan)
 SIMD_METHOD(LessThan)