diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
index 23891dcc2b67d..11bc0310203b2 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
@@ -372,16 +372,6 @@ public static Vector128<T> operator >>>(Vector128<T> value, int shiftCount)
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         public override bool Equals([NotNullWhen(true)] object? obj) => (obj is Vector128<T> other) && Equals(other);
 
-        // Account for floating-point equality around NaN
-        // This is in a separate method so it can be optimized by the mono interpreter/jiterpreter
-        [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static bool EqualsFloatingPoint(Vector128<T> lhs, Vector128<T> rhs)
-        {
-            Vector128<T> result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs));
-            return result.AsInt32() == Vector128<int>.AllBitsSet;
-        }
-
         /// <summary>Determines whether the specified <see cref="Vector128{T}" /> is equal to the current instance.</summary>
         /// <param name="other">The <see cref="Vector128{T}" /> to compare with the current instance.</param>
         /// <returns><c>true</c> if <paramref name="other" /> is equal to the current instance; otherwise, <c>false</c>.</returns>
@@ -395,7 +385,8 @@ public bool Equals(Vector128<T> other)
             {
                 if ((typeof(T) == typeof(double)) || (typeof(T) == typeof(float)))
                 {
-                    return EqualsFloatingPoint(this, other);
+                    Vector128<T> result = Vector128.Equals(this, other) | ~(Vector128.Equals(this, this) | Vector128.Equals(other, other));
+                    return result.AsInt32() == Vector128<int>.AllBitsSet;
                 }
                 else
                 {
diff --git a/src/mono/browser/runtime/jiterpreter-trace-generator.ts b/src/mono/browser/runtime/jiterpreter-trace-generator.ts
index e81c9b6683cbc..b4f3fb5fb353f 100644
--- a/src/mono/browser/runtime/jiterpreter-trace-generator.ts
+++ b/src/mono/browser/runtime/jiterpreter-trace-generator.ts
@@ -3748,33 +3748,6 @@ function emit_simd_3 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrin
                 builder.appendU8(WasmOpcode.i32_eqz);
             append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
             return true;
-        case SimdIntrinsic3.V128_R4_FLOAT_EQUALITY:
-        case SimdIntrinsic3.V128_R8_FLOAT_EQUALITY: {
-            /*
-            Vector128<T> result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs));
-            return result.AsInt32() == Vector128<int>.AllBitsSet;
-            */
-            const isR8 = index === SimdIntrinsic3.V128_R8_FLOAT_EQUALITY,
-                eqOpcode = isR8 ? WasmSimdOpcode.f64x2_eq : WasmSimdOpcode.f32x4_eq;
-            builder.local("pLocals");
-            append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
-            builder.local("math_lhs128", WasmOpcode.tee_local);
-            append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
-            builder.local("math_rhs128", WasmOpcode.tee_local);
-            builder.appendSimd(eqOpcode);
-            builder.local("math_lhs128");
-            builder.local("math_lhs128");
-            builder.appendSimd(eqOpcode);
-            builder.local("math_rhs128");
-            builder.local("math_rhs128");
-            builder.appendSimd(eqOpcode);
-            builder.appendSimd(WasmSimdOpcode.v128_or);
-            builder.appendSimd(WasmSimdOpcode.v128_not);
-            builder.appendSimd(WasmSimdOpcode.v128_or);
-            builder.appendSimd(isR8 ? WasmSimdOpcode.i64x2_all_true : WasmSimdOpcode.i32x4_all_true);
-            append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
-            return true;
-        }
         case SimdIntrinsic3.V128_I1_SHUFFLE: {
             // Detect a constant indices vector and turn it into a const. This allows
             //  v8 to use a more optimized implementation of the swizzle opcode
diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h
index 5fe04bf314c8c..ef230fb341728 100644
--- a/src/mono/mono/mini/interp/interp-internals.h
+++ b/src/mono/mono/mini/interp/interp-internals.h
@@ -31,6 +31,8 @@
 #define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)
 #define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT)
 #define SIZEOF_V128 16
+#define SIZEOF_V2 8
+#define SIZEOF_V3 12
 
 #define INTERP_STACK_SIZE (1024*1024)
 #define INTERP_REDZONE_SIZE (8*1024)
diff --git a/src/mono/mono/mini/interp/interp-simd-intrins.def b/src/mono/mono/mini/interp/interp-simd-intrins.def
index 05f32f7e0f2ee..133094a51cbc9 100644
--- a/src/mono/mono/mini/interp/interp-simd-intrins.def
+++ b/src/mono/mono/mini/interp/interp-simd-intrins.def
@@ -58,8 +58,11 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_o
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, -1)
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, -1)
 
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY, interp_v128_r4_float_equality, -1)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY, interp_v128_r8_float_equality, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4, interp_v128_instance_equals_r4, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4, interp_v2_instance_equals_r4, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4, interp_v3_instance_equals_r4, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8, interp_v128_instance_equals_r8, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITWISE, interp_v128_instance_equals_bitwise, -1)
 
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81)
 
@@ -71,6 +74,12 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_
 INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_DIVISION, interp_v128_r4_op_division, 231)
 
 INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_BITCAST, interp_v128_bitcast, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V2, interp_v128_to_v2, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V3, interp_v128_to_v3, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V128, interp_v2_to_v128, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V3, interp_v2_to_v3, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V128, interp_v3_to_v128, -1)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V2, interp_v3_to_v2, -1)
 
 INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation, 97)
 INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation, 129)
diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c
index 4b3ca913c6e5c..2e1feae88bbe1 100644
--- a/src/mono/mono/mini/interp/interp-simd.c
+++ b/src/mono/mono/mini/interp/interp-simd.c
@@ -6,6 +6,8 @@
 #include <wasm_simd128.h>
 #endif
 
+#include <mono/utils/mono-math.h>
+
 #ifdef INTERP_ENABLE_SIMD
 
 gboolean interp_simd_enabled = TRUE;
@@ -35,6 +37,65 @@ interp_v128_bitcast (gpointer res, gpointer v1)
 	*(v128_i1*)res = *(v128_i1*)v1;
 }
 
+// Vector2 AsVector2(Vector128<float> v1)
+static void
+interp_v128_to_v2 (gpointer res, gpointer v1)
+{
+	memcpy (res, v1, SIZEOF_V2);
+}
+
+// Vector3 AsVector3(Vector128<float> v1)
+static void
+interp_v128_to_v3 (gpointer res, gpointer v1)
+{
+	memcpy (res, v1, SIZEOF_V3);
+}
+
+// Vector128<float> AsVector128(Vector2 v1)
+static void
+interp_v2_to_v128 (gpointer res, gpointer v1)
+{
+	float *res_typed = (float*)res;
+	float *v1_typed = (float*)v1;
+
+	res_typed [0] = v1_typed [0];
+	res_typed [1] = v1_typed [1];
+	res_typed [2] = 0;
+	res_typed [3] = 0;
+}
+
+// Vector3 AsVector3(Vector2 v1)
+static void
+interp_v2_to_v3 (gpointer res, gpointer v1)
+{
+	float *res_typed = (float*)res;
+	float *v1_typed = (float*)v1;
+
+	res_typed [0] = v1_typed [0];
+	res_typed [1] = v1_typed [1];
+	res_typed [2] = 0;
+}
+
+// Vector128<float> AsVector128(Vector3 v1)
+static void
+interp_v3_to_v128 (gpointer res, gpointer v1)
+{
+	float *res_typed = (float*)res;
+	float *v1_typed = (float*)v1;
+
+	res_typed [0] = v1_typed [0];
+	res_typed [1] = v1_typed [1];
+	res_typed [2] = v1_typed [2];
+	res_typed [3] = 0;
+}
+
+// Vector2 AsVector128(Vector3 v1)
+static void
+interp_v3_to_v2 (gpointer res, gpointer v1)
+{
+	memcpy (res, v1, SIZEOF_V2);
+}
+
 // op_Addition
 static void
 interp_v128_i1_op_addition (gpointer res, gpointer v1, gpointer v2)
@@ -132,11 +193,11 @@ interp_v128_op_bitwise_inequality (gpointer res, gpointer v1, gpointer v2)
 		*(gint32*)res = 1;
 }
 
-// Vector128<float>EqualsFloatingPoint
+// Vector128<float>.Equals
 static void
-interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2)
+interp_v128_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2)
 {
-	v128_r4 v1_cast = *(v128_r4*)v1;
+	v128_r4 v1_cast = **(v128_r4**)v1;
 	v128_r4 v2_cast = *(v128_r4*)v2;
 	v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
 	memset (&v1_cast, 0xff, SIZEOF_V128);
@@ -144,10 +205,37 @@ interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2)
 	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
 }
 
+// Vector2.Equals
+static void
+interp_v2_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2)
+{
+	v128_r4 v1_cast;
+	interp_v2_to_v128 (&v1_cast, v1);
+	v128_r4 v2_cast = *(v128_r4*)v2;
+	v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
+	memset (&v1_cast, 0xff, SIZEOF_V2);
+
+	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V2) == 0;
+}
+
+// Vector3.Equals
 static void
-interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2)
+interp_v3_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2)
 {
-	v128_r8 v1_cast = *(v128_r8*)v1;
+	v128_r4 v1_cast;
+	interp_v3_to_v128 (&v1_cast, v1);
+	v128_r4 v2_cast = *(v128_r4*)v2;
+	v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
+	memset (&v1_cast, 0xff, SIZEOF_V3);
+
+	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V3) == 0;
+}
+
+// Vector128<double>.Equals
+static void
+interp_v128_instance_equals_r8 (gpointer res, gpointer v1, gpointer v2)
+{
+	v128_r8 v1_cast = **(v128_r8**)v1;
 	v128_r8 v2_cast = *(v128_r8*)v2;
 	v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
 	memset (&v1_cast, 0xff, SIZEOF_V128);
@@ -155,6 +243,13 @@ interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2)
 	*(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
 }
 
+// Vector128<T>.Equals, for integer T
+static void
+interp_v128_instance_equals_bitwise (gpointer res, gpointer v1, gpointer v2)
+{
+	interp_v128_op_bitwise_equality(res, *(v128_i1**)v1, v2);
+}
+
 // op_Multiply
 static void
 interp_v128_i1_op_multiply (gpointer res, gpointer v1, gpointer v2)
diff --git a/src/mono/mono/mini/interp/simd-methods.def b/src/mono/mono/mini/interp/simd-methods.def
index a919c585114ed..0f494f4ef6a50 100644
--- a/src/mono/mono/mini/interp/simd-methods.def
+++ b/src/mono/mono/mini/interp/simd-methods.def
@@ -39,8 +39,11 @@ SIMD_METHOD(AsUInt16)
 SIMD_METHOD(AsUInt32)
 SIMD_METHOD(AsUInt64)
 SIMD_METHOD(AsVector)
-SIMD_METHOD(AsVector4)
 SIMD_METHOD(AsVector128)
+SIMD_METHOD(AsVector128Unsafe)
+SIMD_METHOD(AsVector2)
+SIMD_METHOD(AsVector3)
+SIMD_METHOD(AsVector4)
 SIMD_METHOD(ConditionalSelect)
 SIMD_METHOD(Create)
 SIMD_METHOD(CreateScalar)
@@ -48,7 +51,6 @@ SIMD_METHOD(CreateScalarUnsafe)
 
 SIMD_METHOD(Equals)
 SIMD_METHOD(EqualsAny)
-SIMD_METHOD(EqualsFloatingPoint)
 SIMD_METHOD(ExtractMostSignificantBits)
 SIMD_METHOD(GreaterThan)
 SIMD_METHOD(LessThan)
diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c
index 2b64baff7f44a..a3bacd757b342 100644
--- a/src/mono/mono/mini/interp/transform-simd.c
+++ b/src/mono/mono/mini/interp/transform-simd.c
@@ -72,8 +72,11 @@ static guint16 sri_vector128_methods [] = {
 	SN_AsUInt32,
 	SN_AsUInt64,
 	SN_AsVector,
-	SN_AsVector4,
 	SN_AsVector128,
+	SN_AsVector128Unsafe,
+	SN_AsVector2,
+	SN_AsVector3,
+	SN_AsVector4,
 	SN_ConditionalSelect,
 	SN_Create,
 	SN_CreateScalar,
@@ -95,7 +98,7 @@ static guint16 sri_vector128_methods [] = {
 };
 
 static guint16 sri_vector128_t_methods [] = {
-	SN_EqualsFloatingPoint,
+	SN_Equals,
 	SN_get_AllBitsSet,
 	SN_get_Count,
 	SN_get_One,
@@ -117,6 +120,7 @@ static guint16 sri_vector128_t_methods [] = {
 };
 
 static guint16 sn_vector_t_methods [] = {
+	SN_Equals,
 	SN_ctor,
 	SN_get_AllBitsSet,
 	SN_get_Count,
@@ -217,13 +221,6 @@ emit_common_simd_operations (TransformData *td, int id, int atype, int vector_si
 				*simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY;
 			}
 			break;
-		case SN_EqualsFloatingPoint:
-			*simd_opcode = MINT_SIMD_INTRINS_P_PP;
-			if (atype == MONO_TYPE_R4)
-				*simd_intrins = INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY;
-			else if (atype == MONO_TYPE_R8)
-				*simd_intrins = INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY;
-			break;
 		case SN_op_ExclusiveOr:
 			*simd_opcode = MINT_SIMD_INTRINS_P_PP;
 			*simd_intrins = INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR;
@@ -367,8 +364,19 @@ is_element_type_primitive (MonoType *vector_type)
 static void
 emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMethodSignature *csignature, int vector_size, gboolean allow_void)
 {
-	td->sp -= csignature->param_count;
-	for (int i = 0; i < csignature->param_count; i++)
+	// The implicit this isn't tracked as part of param_count unless
+	// explicit_this is also set, but we shouldn't encounter such
+	// explicit_this cases for the intrinsics
+
+	guint16 param_count = csignature->param_count;
+	
+	if (csignature->hasthis)
+		param_count++;
+
+	g_assert(!csignature->explicit_this);
+
+	td->sp -= param_count;
+	for (int i = 0; i < param_count; i++)
 		td->last_ins->sregs [i] = td->sp [i].var;
 
 	int ret_mt = mono_mint_type (csignature->ret);
@@ -376,7 +384,6 @@ emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMetho
 		g_assert (allow_void);
 		interp_ins_set_dummy_dreg (td->last_ins, td);
 	} else if (ret_mt == MINT_TYPE_VT) {
-		// For these intrinsics, if we return a VT then it is a V128
 		push_type_vt (td, vector_klass, vector_size);
 		interp_ins_set_dreg (td->last_ins, td->sp [-1].var);
 	} else {
@@ -470,6 +477,9 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
 		}
 		case SN_AsVector:
 		case SN_AsVector128:
+		case SN_AsVector128Unsafe:
+		case SN_AsVector2:
+		case SN_AsVector3:
 		case SN_AsVector4: {
 			if (!is_element_type_primitive (csignature->ret) || !is_element_type_primitive (csignature->params [0]))
 				return FALSE;
@@ -480,12 +490,50 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
 			MonoClass *arg_class = mono_class_from_mono_type_internal (csignature->params [0]);
 			int arg_size = mono_class_value_size (arg_class, NULL);
 
+			vector_klass = ret_class;
+			vector_size = ret_size;
+
 			if (arg_size == ret_size) {
 				simd_opcode = MINT_SIMD_INTRINS_P_P;
 				simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITCAST;
 				break;
 			}
-			return FALSE;
+			
+			if ((ret_size != 8) && (ret_size != 12) && (ret_size != 16)) {
+				return FALSE;
+			}
+
+			if ((arg_size != 8) && (arg_size != 12) && (arg_size != 16)) {
+				return FALSE;
+			}
+
+			if (arg_size > ret_size) {
+				simd_opcode = MINT_SIMD_INTRINS_P_P;
+
+				if (ret_size == 8) {
+					if (arg_size == 16) {
+						simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V2;
+					} else if (arg_size == 12) {
+						simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V2;
+					}
+				} else if ((ret_size == 12) && (arg_size == 16)) {
+					simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V3;
+				}
+				break;
+			} else {
+				simd_opcode = MINT_SIMD_INTRINS_P_P;
+
+				if (arg_size == 8) {
+					if (ret_size == 12) {
+						simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V3;
+					} else if (ret_size == 16) {
+						simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V128;
+					}
+				} else if ((arg_size == 12) && (ret_size == 16)) {
+					simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V128;
+				}
+				break;
+			}
 		}
 		case SN_ConditionalSelect:
 			simd_opcode = MINT_SIMD_INTRINS_P_PPP;
@@ -627,8 +675,21 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur
 	if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg))
 		return FALSE;
 
-	if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins))
+	if (id == SN_Equals) {
+		simd_opcode = MINT_SIMD_INTRINS_P_PP;
+
+		if (atype == MONO_TYPE_R4) {
+			simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4;
+		}
+		else if (atype == MONO_TYPE_R8) {
+			simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8;
+		}
+		else {
+			simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITWISE;
+		}
+	} else if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) {
 		goto opcode_added;
+	}
 
 	if (simd_opcode == -1 || simd_intrins == -1)
 		return FALSE;
@@ -687,7 +748,7 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c
 }
 
 static gboolean
-emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
+emit_sn_vector_2_3_4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
 {
 	int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod);
 	if (id == -1)
@@ -700,14 +761,57 @@ emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *cs
 	MonoClass *vector_klass = cmethod->klass;
 
 	MonoTypeEnum atype = MONO_TYPE_R4;
-	int vector_size = SIZEOF_V128;
+	int vector_size = mono_class_value_size (vector_klass, NULL);
 	int arg_size = sizeof (float);
+
+	if ((vector_size != 8) && (vector_size != 12) && (vector_size != 16)) {
+		return FALSE;
+	}
+
 	int scalar_arg = -1;
 	for (int i = 0; i < csignature->param_count; i++) {
 		if (csignature->params [i]->type != MONO_TYPE_GENERICINST)
 			scalar_arg = i;
 	}
 
+	const char *class_name = m_class_get_name (vector_klass);
+	bool isQuaternion = !strcmp (class_name, "Quaternion");
+	bool isPlane = !strcmp (class_name, "Plane");
+
+	if (id == SN_ctor) {
+		if ((vector_size == 8) || (vector_size == 12)) {
+			// FIXME: We should handle creation of Vector2/Vector3
+			return FALSE;
+		} else if (isQuaternion || isPlane) {
+			// FIXME: We should handle creation of Quaternion/Plane
+			return FALSE;
+		}
+	}
+
+	if (isQuaternion) {
+		if ((id == SN_op_Multiply) || (id == SN_op_Division)) {
+			// FIXME: We should handle multiplication and division of Quaternion
+			return FALSE;
+		}
+	}
+
+	if (id == SN_Equals) {
+		simd_opcode = MINT_SIMD_INTRINS_P_PP;
+
+		if (vector_size == 8) {
+			simd_intrins = INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4;
+		} else if (vector_size == 12) {
+			simd_intrins = INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4;
+		} else {
+			simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4;
+		}
+	}
+	
+	if ((vector_size == 8) || (vector_size == 12)) {
+		// FIXME: We should other APIs for Vector2/Vector3
+		return FALSE;
+	}
+
 	if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) {
 		goto opcode_added;
 	} else if (id == SN_ctor) {
@@ -1023,8 +1127,8 @@ interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodS
 	} else if (!strcmp (class_ns, "System.Numerics")) {
 		if (!strcmp (class_name, "Vector`1"))
 			return emit_sn_vector_t (td, cmethod, csignature, newobj);
-		else if (!strcmp (class_name, "Vector4"))
-			return emit_sn_vector4 (td, cmethod, csignature, newobj);
+		else if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector3") || !strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
+			return emit_sn_vector_2_3_4 (td, cmethod, csignature, newobj);
 	} else if (!strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) {
 		if (!strcmp (class_name, "PackedSimd"))
 			return emit_sri_packedsimd (td, cmethod, csignature);
diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c
index c6be2c45af7b0..57d56da16593f 100644
--- a/src/mono/mono/mini/mini-llvm.c
+++ b/src/mono/mono/mini/mini-llvm.c
@@ -676,6 +676,10 @@ simd_class_to_llvm_type (EmitContext *ctx, MonoClass *klass)
 	} else {
 		guint32 nelems;
 		MonoTypeEnum type = mini_get_simd_type_info (klass, &nelems);
+		if (nelems == 3) {
+			// Override to 3 elements + zero
+			nelems = 4;
+		}
 		return LLVMVectorType (primitive_type_to_llvm_type (type), nelems);
 	}
 	g_assert_not_reached ();
@@ -8276,6 +8280,11 @@ MONO_RESTORE_WARNING
 		case OP_XCONST: {
 			int ecount;
 			MonoTypeEnum etype = mini_get_simd_type_info (ins->klass, (guint32*)&ecount);
+			
+			if (ecount == 3) {
+				// Override to 3 elements + zero
+				ecount = 4;
+			}
 
 			LLVMTypeRef llvm_type = primitive_type_to_llvm_type (etype);
 			LLVMValueRef vals [64];
diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c
index 395a869ab22be..82c913f285c1d 100644
--- a/src/mono/mono/mini/mini.c
+++ b/src/mono/mono/mini/mini.c
@@ -4599,8 +4599,7 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems)
 		*nelems = 2;
 		return MONO_TYPE_R4;
 	} else if (!strcmp (klass_name, "Vector3")) {
-		// For LLVM SIMD support, Vector3 is treated as a 4-element vector (three elements + zero).
-		*nelems = 4;
+		*nelems = 3;
 		return MONO_TYPE_R4;
 	} else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) {
 		MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0];
diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c
index bcd4ae0639e92..254198fd0c868 100644
--- a/src/mono/mono/mini/simd-intrinsics.c
+++ b/src/mono/mono/mini/simd-intrinsics.c
@@ -653,11 +653,6 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t
 	guint32 nelems;
  	mini_get_simd_type_info (vector_class, &nelems);
 
-	// Override nelems for Vector3, with actual number of elements, instead of treating it as a 4-element vector (three elements + zero).
-	const char *klass_name = m_class_get_name (vector_class);
-	if (!strcmp (klass_name, "Vector3"))
-		nelems = 3;
-
 	element_size = vector_size / nelems;
 	gboolean has_single_element = vector_size == element_size;
 
@@ -1196,6 +1191,9 @@ static guint16 sri_vector_methods [] = {
 	SN_AsUInt64,
 	SN_AsVector,
 	SN_AsVector128,
+	SN_AsVector128Unsafe,
+	SN_AsVector2,
+	SN_AsVector3,
 	SN_AsVector4,
 	SN_BitwiseAnd,
 	SN_BitwiseOr,
@@ -1640,7 +1638,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 	}
 	case SN_AsVector:
 	case SN_AsVector128:
-	case SN_AsVector4: {
+	case SN_AsVector128Unsafe:
+	case SN_AsVector2:
+	case SN_AsVector3:
+	case SN_AsVector4:
+	case SN_AsVector4Unsafe: {
 		if (!is_element_type_primitive (fsig->ret) || !is_element_type_primitive (fsig->params [0]))
 			return NULL;
 
@@ -1650,10 +1652,59 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
 		int arg_size = mono_class_value_size (arg_class, NULL);
 
-		if (arg_size == ret_size)
+		if (arg_size == ret_size) {
 			return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
+		}
 
-		return NULL;
+		if ((ret_size != 8) && (ret_size != 12) && (ret_size != 16)) {
+			return NULL;
+		}
+
+		if ((arg_size != 8) && (arg_size != 12) && (arg_size != 16)) {
+			return NULL;
+		}
+
+		bool isUnsafe = (id == SN_AsVector128Unsafe) || (id == SN_AsVector4Unsafe);
+
+		if (arg_size > ret_size) {
+#ifdef TARGET_ARM64
+			if (ret_size == 8) {
+				return emit_simd_ins_for_sig (cfg, klass, OP_XLOWER, 0, arg0_type, fsig, args);
+			}
+#endif
+			return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
+		} else {
+#ifdef TARGET_ARM64
+			if (arg_size == 8) {
+				if (!COMPILE_LLVM (cfg)) {
+					// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
+					return NULL;
+				}
+
+				int op = isUnsafe ? OP_XWIDEN : OP_XWIDEN_UNSAFE;
+				return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args);
+			}
+#endif
+			MonoInst *ins  = args [0];
+
+			if (!isUnsafe) {
+				static float r4_0 = 0;
+				MonoInst *zero;
+				int zero_dreg = alloc_freg (cfg);
+				MONO_INST_NEW (cfg, zero, OP_R4CONST);
+				zero->inst_p0 = (void*)&r4_0;
+				zero->dreg = zero_dreg;
+				MONO_ADD_INS (cfg->cbb, zero);
+
+				if (arg_size == 8) {
+					ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 2, FALSE);
+				}
+				if (ret_size == 16) {
+					ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 3, FALSE);
+				}
+			}
+			return emit_simd_ins (cfg, klass, OP_XCAST, ins->dreg, -1);
+		}
 	}
 	case SN_Ceiling:
 	case SN_Floor: {
diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h
index 071da07c9e5bf..1156e56a31689 100644
--- a/src/mono/mono/mini/simd-methods.h
+++ b/src/mono/mono/mini/simd-methods.h
@@ -82,10 +82,12 @@ METHOD(AsUInt32)
 METHOD(AsUInt64)
 METHOD(AsVector)
 METHOD(AsVector128)
+METHOD(AsVector128Unsafe)
 METHOD(AsVector2)
 METHOD(AsVector256)
 METHOD(AsVector3)
 METHOD(AsVector4)
+METHOD(AsVector4Unsafe)
 METHOD(BitwiseAnd)
 METHOD(BitwiseOr)
 METHOD(Create)