diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index 23891dcc2b67d..11bc0310203b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -372,16 +372,6 @@ public static Vector128 operator >>>(Vector128 value, int shiftCount) /// The type of the vector () is not supported. public override bool Equals([NotNullWhen(true)] object? obj) => (obj is Vector128 other) && Equals(other); - // Account for floating-point equality around NaN - // This is in a separate method so it can be optimized by the mono interpreter/jiterpreter - [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool EqualsFloatingPoint(Vector128 lhs, Vector128 rhs) - { - Vector128 result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs)); - return result.AsInt32() == Vector128.AllBitsSet; - } - /// Determines whether the specified is equal to the current instance. /// The to compare with the current instance. /// true if is equal to the current instance; otherwise, false. @@ -395,7 +385,8 @@ public bool Equals(Vector128 other) { if ((typeof(T) == typeof(double)) || (typeof(T) == typeof(float))) { - return EqualsFloatingPoint(this, other); + Vector128 result = Vector128.Equals(this, other) | ~(Vector128.Equals(this, this) | Vector128.Equals(other, other)); + return result.AsInt32() == Vector128.AllBitsSet; } else { diff --git a/src/mono/browser/runtime/jiterpreter-trace-generator.ts b/src/mono/browser/runtime/jiterpreter-trace-generator.ts index e81c9b6683cbc..b4f3fb5fb353f 100644 --- a/src/mono/browser/runtime/jiterpreter-trace-generator.ts +++ b/src/mono/browser/runtime/jiterpreter-trace-generator.ts @@ -3748,33 +3748,6 @@ function emit_simd_3 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrin builder.appendU8(WasmOpcode.i32_eqz); append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); return true; - case SimdIntrinsic3.V128_R4_FLOAT_EQUALITY: - case SimdIntrinsic3.V128_R8_FLOAT_EQUALITY: { - /* - Vector128 result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs)); - return result.AsInt32() == Vector128.AllBitsSet; - */ - const isR8 = index === SimdIntrinsic3.V128_R8_FLOAT_EQUALITY, - eqOpcode = isR8 ? WasmSimdOpcode.f64x2_eq : WasmSimdOpcode.f32x4_eq; - builder.local("pLocals"); - append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); - builder.local("math_lhs128", WasmOpcode.tee_local); - append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); - builder.local("math_rhs128", WasmOpcode.tee_local); - builder.appendSimd(eqOpcode); - builder.local("math_lhs128"); - builder.local("math_lhs128"); - builder.appendSimd(eqOpcode); - builder.local("math_rhs128"); - builder.local("math_rhs128"); - builder.appendSimd(eqOpcode); - builder.appendSimd(WasmSimdOpcode.v128_or); - builder.appendSimd(WasmSimdOpcode.v128_not); - builder.appendSimd(WasmSimdOpcode.v128_or); - builder.appendSimd(isR8 ? WasmSimdOpcode.i64x2_all_true : WasmSimdOpcode.i32x4_all_true); - append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); - return true; - } case SimdIntrinsic3.V128_I1_SHUFFLE: { // Detect a constant indices vector and turn it into a const. This allows // v8 to use a more optimized implementation of the swizzle opcode diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index 5fe04bf314c8c..ef230fb341728 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -31,6 +31,8 @@ #define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE) #define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT) #define SIZEOF_V128 16 +#define SIZEOF_V2 8 +#define SIZEOF_V3 12 #define INTERP_STACK_SIZE (1024*1024) #define INTERP_REDZONE_SIZE (8*1024) diff --git a/src/mono/mono/mini/interp/interp-simd-intrins.def b/src/mono/mono/mini/interp/interp-simd-intrins.def index 05f32f7e0f2ee..133094a51cbc9 100644 --- a/src/mono/mono/mini/interp/interp-simd-intrins.def +++ b/src/mono/mono/mini/interp/interp-simd-intrins.def @@ -58,8 +58,11 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_o INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, -1) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY, interp_v128_r4_float_equality, -1) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY, interp_v128_r8_float_equality, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4, interp_v128_instance_equals_r4, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4, interp_v2_instance_equals_r4, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4, interp_v3_instance_equals_r4, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8, interp_v128_instance_equals_r8, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITWISE, interp_v128_instance_equals_bitwise, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81) @@ -71,6 +74,12 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_DIVISION, interp_v128_r4_op_division, 231) INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_BITCAST, interp_v128_bitcast, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V2, interp_v128_to_v2, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V3, interp_v128_to_v3, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V128, interp_v2_to_v128, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V3, interp_v2_to_v3, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V128, interp_v3_to_v128, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V2, interp_v3_to_v2, -1) INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation, 97) INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation, 129) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index 4b3ca913c6e5c..2e1feae88bbe1 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -6,6 +6,8 @@ #include #endif +#include + #ifdef INTERP_ENABLE_SIMD gboolean interp_simd_enabled = TRUE; @@ -35,6 +37,65 @@ interp_v128_bitcast (gpointer res, gpointer v1) *(v128_i1*)res = *(v128_i1*)v1; } +// Vector2 AsVector2(Vector128 v1) +static void +interp_v128_to_v2 (gpointer res, gpointer v1) +{ + memcpy (res, v1, SIZEOF_V2); +} + +// Vector3 AsVector3(Vector128 v1) +static void +interp_v128_to_v3 (gpointer res, gpointer v1) +{ + memcpy (res, v1, SIZEOF_V3); +} + +// Vector128 AsVector128(Vector2 v1) +static void +interp_v2_to_v128 (gpointer res, gpointer v1) +{ + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; + + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; + res_typed [2] = 0; + res_typed [3] = 0; +} + +// Vector3 AsVector3(Vector2 v1) +static void +interp_v2_to_v3 (gpointer res, gpointer v1) +{ + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; + + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; + res_typed [2] = 0; +} + +// Vector128 AsVector128(Vector3 v1) +static void +interp_v3_to_v128 (gpointer res, gpointer v1) +{ + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; + + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; + res_typed [2] = v1_typed [2]; + res_typed [3] = 0; +} + +// Vector2 AsVector128(Vector3 v1) +static void +interp_v3_to_v2 (gpointer res, gpointer v1) +{ + memcpy (res, v1, SIZEOF_V2); +} + // op_Addition static void interp_v128_i1_op_addition (gpointer res, gpointer v1, gpointer v2) @@ -132,11 +193,11 @@ interp_v128_op_bitwise_inequality (gpointer res, gpointer v1, gpointer v2) *(gint32*)res = 1; } -// Vector128EqualsFloatingPoint +// Vector128.Equals static void -interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2) +interp_v128_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) { - v128_r4 v1_cast = *(v128_r4*)v1; + v128_r4 v1_cast = **(v128_r4**)v1; v128_r4 v2_cast = *(v128_r4*)v2; v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); memset (&v1_cast, 0xff, SIZEOF_V128); @@ -144,10 +205,37 @@ interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2) *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0; } +// Vector2.Equals +static void +interp_v2_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) +{ + v128_r4 v1_cast; + interp_v2_to_v128 (&v1_cast, v1); + v128_r4 v2_cast = *(v128_r4*)v2; + v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); + memset (&v1_cast, 0xff, SIZEOF_V2); + + *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V2) == 0; +} + +// Vector3.Equals static void -interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2) +interp_v3_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) { - v128_r8 v1_cast = *(v128_r8*)v1; + v128_r4 v1_cast; + interp_v3_to_v128 (&v1_cast, v1); + v128_r4 v2_cast = *(v128_r4*)v2; + v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); + memset (&v1_cast, 0xff, SIZEOF_V3); + + *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V3) == 0; +} + +// Vector128.Equals +static void +interp_v128_instance_equals_r8 (gpointer res, gpointer v1, gpointer v2) +{ + v128_r8 v1_cast = **(v128_r8**)v1; v128_r8 v2_cast = *(v128_r8*)v2; v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); memset (&v1_cast, 0xff, SIZEOF_V128); @@ -155,6 +243,13 @@ interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2) *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0; } +// Vector128.Equals, for integer T +static void +interp_v128_instance_equals_bitwise (gpointer res, gpointer v1, gpointer v2) +{ + interp_v128_op_bitwise_equality(res, *(v128_i1**)v1, v2); +} + // op_Multiply static void interp_v128_i1_op_multiply (gpointer res, gpointer v1, gpointer v2) diff --git a/src/mono/mono/mini/interp/simd-methods.def b/src/mono/mono/mini/interp/simd-methods.def index a919c585114ed..0f494f4ef6a50 100644 --- a/src/mono/mono/mini/interp/simd-methods.def +++ b/src/mono/mono/mini/interp/simd-methods.def @@ -39,8 +39,11 @@ SIMD_METHOD(AsUInt16) SIMD_METHOD(AsUInt32) SIMD_METHOD(AsUInt64) SIMD_METHOD(AsVector) -SIMD_METHOD(AsVector4) SIMD_METHOD(AsVector128) +SIMD_METHOD(AsVector128Unsafe) +SIMD_METHOD(AsVector2) +SIMD_METHOD(AsVector3) +SIMD_METHOD(AsVector4) SIMD_METHOD(ConditionalSelect) SIMD_METHOD(Create) SIMD_METHOD(CreateScalar) @@ -48,7 +51,6 @@ SIMD_METHOD(CreateScalarUnsafe) SIMD_METHOD(Equals) SIMD_METHOD(EqualsAny) -SIMD_METHOD(EqualsFloatingPoint) SIMD_METHOD(ExtractMostSignificantBits) SIMD_METHOD(GreaterThan) SIMD_METHOD(LessThan) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index 2b64baff7f44a..a3bacd757b342 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -72,8 +72,11 @@ static guint16 sri_vector128_methods [] = { SN_AsUInt32, SN_AsUInt64, SN_AsVector, - SN_AsVector4, SN_AsVector128, + SN_AsVector128Unsafe, + SN_AsVector2, + SN_AsVector3, + SN_AsVector4, SN_ConditionalSelect, SN_Create, SN_CreateScalar, @@ -95,7 +98,7 @@ static guint16 sri_vector128_methods [] = { }; static guint16 sri_vector128_t_methods [] = { - SN_EqualsFloatingPoint, + SN_Equals, SN_get_AllBitsSet, SN_get_Count, SN_get_One, @@ -117,6 +120,7 @@ static guint16 sri_vector128_t_methods [] = { }; static guint16 sn_vector_t_methods [] = { + SN_Equals, SN_ctor, SN_get_AllBitsSet, SN_get_Count, @@ -217,13 +221,6 @@ emit_common_simd_operations (TransformData *td, int id, int atype, int vector_si *simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY; } break; - case SN_EqualsFloatingPoint: - *simd_opcode = MINT_SIMD_INTRINS_P_PP; - if (atype == MONO_TYPE_R4) - *simd_intrins = INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY; - else if (atype == MONO_TYPE_R8) - *simd_intrins = INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY; - break; case SN_op_ExclusiveOr: *simd_opcode = MINT_SIMD_INTRINS_P_PP; *simd_intrins = INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR; @@ -367,8 +364,19 @@ is_element_type_primitive (MonoType *vector_type) static void emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMethodSignature *csignature, int vector_size, gboolean allow_void) { - td->sp -= csignature->param_count; - for (int i = 0; i < csignature->param_count; i++) + // The implicit this isn't tracked as part of param_count unless + // explicit_this is also set, but we shouldn't encounter such + // explicit_this cases for the intrinsics + + guint16 param_count = csignature->param_count; + + if (csignature->hasthis) + param_count++; + + g_assert(!csignature->explicit_this); + + td->sp -= param_count; + for (int i = 0; i < param_count; i++) td->last_ins->sregs [i] = td->sp [i].var; int ret_mt = mono_mint_type (csignature->ret); @@ -376,7 +384,6 @@ emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMetho g_assert (allow_void); interp_ins_set_dummy_dreg (td->last_ins, td); } else if (ret_mt == MINT_TYPE_VT) { - // For these intrinsics, if we return a VT then it is a V128 push_type_vt (td, vector_klass, vector_size); interp_ins_set_dreg (td->last_ins, td->sp [-1].var); } else { @@ -470,6 +477,9 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature } case SN_AsVector: case SN_AsVector128: + case SN_AsVector128Unsafe: + case SN_AsVector2: + case SN_AsVector3: case SN_AsVector4: { if (!is_element_type_primitive (csignature->ret) || !is_element_type_primitive (csignature->params [0])) return FALSE; @@ -480,12 +490,50 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature MonoClass *arg_class = mono_class_from_mono_type_internal (csignature->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); + vector_klass = ret_class; + vector_size = ret_size; + if (arg_size == ret_size) { simd_opcode = MINT_SIMD_INTRINS_P_P; simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITCAST; break; } - return FALSE; + + if ((ret_size != 8) && (ret_size != 12) && (ret_size != 16)) { + return FALSE; + } + + if ((arg_size != 8) && (arg_size != 12) && (arg_size != 16)) { + return FALSE; + } + + if (arg_size > ret_size) { + simd_opcode = MINT_SIMD_INTRINS_P_P; + + if (ret_size == 8) { + if (arg_size == 16) { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V2; + } else if (arg_size == 12) { + simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V2; + } + } else if ((ret_size == 12) && (arg_size == 16)) { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V3; + } + break; + } else { + simd_opcode = MINT_SIMD_INTRINS_P_P; + + if (arg_size == 8) { + if (ret_size == 12) { + simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V3; + } else if (ret_size == 16) { + simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V128; + } + } else if ((arg_size == 12) && (ret_size == 16)) { + simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V128; + } + break; + } } case SN_ConditionalSelect: simd_opcode = MINT_SIMD_INTRINS_P_PPP; @@ -627,8 +675,21 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg)) return FALSE; - if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) + if (id == SN_Equals) { + simd_opcode = MINT_SIMD_INTRINS_P_PP; + + if (atype == MONO_TYPE_R4) { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4; + } + else if (atype == MONO_TYPE_R8) { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8; + } + else { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITWISE; + } + } else if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) { goto opcode_added; + } if (simd_opcode == -1 || simd_intrins == -1) return FALSE; @@ -687,7 +748,7 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c } static gboolean -emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj) +emit_sn_vector_2_3_4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj) { int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod); if (id == -1) @@ -700,14 +761,57 @@ emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *cs MonoClass *vector_klass = cmethod->klass; MonoTypeEnum atype = MONO_TYPE_R4; - int vector_size = SIZEOF_V128; + int vector_size = mono_class_value_size (vector_klass, NULL); int arg_size = sizeof (float); + + if ((vector_size != 8) && (vector_size != 12) && (vector_size != 16)) { + return FALSE; + } + int scalar_arg = -1; for (int i = 0; i < csignature->param_count; i++) { if (csignature->params [i]->type != MONO_TYPE_GENERICINST) scalar_arg = i; } + const char *class_name = m_class_get_name (vector_klass); + bool isQuaternion = !strcmp (class_name, "Quaternion"); + bool isPlane = !strcmp (class_name, "Plane"); + + if (id == SN_ctor) { + if ((vector_size == 8) || (vector_size == 12)) { + // FIXME: We should handle creation of Vector2/Vector3 + return FALSE; + } else if (isQuaternion || isPlane) { + // FIXME: We should handle creation of Quaternion/Plane + return FALSE; + } + } + + if (isQuaternion) { + if ((id == SN_op_Multiply) || (id == SN_op_Division)) { + // FIXME: We should handle multiplication and division of Quaternion + return FALSE; + } + } + + if (id == SN_Equals) { + simd_opcode = MINT_SIMD_INTRINS_P_PP; + + if (vector_size == 8) { + simd_intrins = INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4; + } else if (vector_size == 12) { + simd_intrins = INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4; + } else { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4; + } + } + + if ((vector_size == 8) || (vector_size == 12)) { + // FIXME: We should other APIs for Vector2/Vector3 + return FALSE; + } + if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) { goto opcode_added; } else if (id == SN_ctor) { @@ -1023,8 +1127,8 @@ interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodS } else if (!strcmp (class_ns, "System.Numerics")) { if (!strcmp (class_name, "Vector`1")) return emit_sn_vector_t (td, cmethod, csignature, newobj); - else if (!strcmp (class_name, "Vector4")) - return emit_sn_vector4 (td, cmethod, csignature, newobj); + else if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector3") || !strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane")) + return emit_sn_vector_2_3_4 (td, cmethod, csignature, newobj); } else if (!strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) { if (!strcmp (class_name, "PackedSimd")) return emit_sri_packedsimd (td, cmethod, csignature); diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index c6be2c45af7b0..57d56da16593f 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -676,6 +676,10 @@ simd_class_to_llvm_type (EmitContext *ctx, MonoClass *klass) } else { guint32 nelems; MonoTypeEnum type = mini_get_simd_type_info (klass, &nelems); + if (nelems == 3) { + // Override to 3 elements + zero + nelems = 4; + } return LLVMVectorType (primitive_type_to_llvm_type (type), nelems); } g_assert_not_reached (); @@ -8276,6 +8280,11 @@ MONO_RESTORE_WARNING case OP_XCONST: { int ecount; MonoTypeEnum etype = mini_get_simd_type_info (ins->klass, (guint32*)&ecount); + + if (ecount == 3) { + // Override to 3 elements + zero + ecount = 4; + } LLVMTypeRef llvm_type = primitive_type_to_llvm_type (etype); LLVMValueRef vals [64]; diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index 395a869ab22be..82c913f285c1d 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -4599,8 +4599,7 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) *nelems = 2; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector3")) { - // For LLVM SIMD support, Vector3 is treated as a 4-element vector (three elements + zero). - *nelems = 4; + *nelems = 3; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0]; diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index bcd4ae0639e92..254198fd0c868 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -653,11 +653,6 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t guint32 nelems; mini_get_simd_type_info (vector_class, &nelems); - // Override nelems for Vector3, with actual number of elements, instead of treating it as a 4-element vector (three elements + zero). - const char *klass_name = m_class_get_name (vector_class); - if (!strcmp (klass_name, "Vector3")) - nelems = 3; - element_size = vector_size / nelems; gboolean has_single_element = vector_size == element_size; @@ -1196,6 +1191,9 @@ static guint16 sri_vector_methods [] = { SN_AsUInt64, SN_AsVector, SN_AsVector128, + SN_AsVector128Unsafe, + SN_AsVector2, + SN_AsVector3, SN_AsVector4, SN_BitwiseAnd, SN_BitwiseOr, @@ -1640,7 +1638,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } case SN_AsVector: case SN_AsVector128: - case SN_AsVector4: { + case SN_AsVector128Unsafe: + case SN_AsVector2: + case SN_AsVector3: + case SN_AsVector4: + case SN_AsVector4Unsafe: { if (!is_element_type_primitive (fsig->ret) || !is_element_type_primitive (fsig->params [0])) return NULL; @@ -1650,10 +1652,59 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); - if (arg_size == ret_size) + if (arg_size == ret_size) { return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1); + } - return NULL; + if ((ret_size != 8) && (ret_size != 12) && (ret_size != 16)) { + return NULL; + } + + if ((arg_size != 8) && (arg_size != 12) && (arg_size != 16)) { + return NULL; + } + + bool isUnsafe = (id == SN_AsVector128Unsafe) || (id == SN_AsVector4Unsafe); + + if (arg_size > ret_size) { +#ifdef TARGET_ARM64 + if (ret_size == 8) { + return emit_simd_ins_for_sig (cfg, klass, OP_XLOWER, 0, arg0_type, fsig, args); + } +#endif + return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1); + } else { +#ifdef TARGET_ARM64 + if (arg_size == 8) { + if (!COMPILE_LLVM (cfg)) { + // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 + return NULL; + } + + int op = isUnsafe ? OP_XWIDEN : OP_XWIDEN_UNSAFE; + return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); + } +#endif + MonoInst *ins = args [0]; + + if (!isUnsafe) { + static float r4_0 = 0; + MonoInst *zero; + int zero_dreg = alloc_freg (cfg); + MONO_INST_NEW (cfg, zero, OP_R4CONST); + zero->inst_p0 = (void*)&r4_0; + zero->dreg = zero_dreg; + MONO_ADD_INS (cfg->cbb, zero); + + if (arg_size == 8) { + ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 2, FALSE); + } + if (ret_size == 16) { + ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 3, FALSE); + } + } + return emit_simd_ins (cfg, klass, OP_XCAST, ins->dreg, -1); + } } case SN_Ceiling: case SN_Floor: { diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h index 071da07c9e5bf..1156e56a31689 100644 --- a/src/mono/mono/mini/simd-methods.h +++ b/src/mono/mono/mini/simd-methods.h @@ -82,10 +82,12 @@ METHOD(AsUInt32) METHOD(AsUInt64) METHOD(AsVector) METHOD(AsVector128) +METHOD(AsVector128Unsafe) METHOD(AsVector2) METHOD(AsVector256) METHOD(AsVector3) METHOD(AsVector4) +METHOD(AsVector4Unsafe) METHOD(BitwiseAnd) METHOD(BitwiseOr) METHOD(Create)