From e1d8614bd2f5f734f64a28ba5ddf73fdb5df37f8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 4 Jul 2024 10:06:05 -0700 Subject: [PATCH 01/11] Have mono handle the vector as APIs that grow or shrink the vector type --- src/mono/mono/mini/interp/interp-internals.h | 2 + .../mono/mini/interp/interp-simd-intrins.def | 6 ++ src/mono/mono/mini/interp/interp-simd.c | 52 +++++++++++++++++ src/mono/mono/mini/interp/simd-methods.def | 5 +- src/mono/mono/mini/interp/transform-simd.c | 45 ++++++++++++++- src/mono/mono/mini/simd-intrinsics.c | 57 ++++++++++++++++++- src/mono/mono/mini/simd-methods.h | 2 + 7 files changed, 163 insertions(+), 6 deletions(-) diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index 5fe04bf314c8c..ef230fb341728 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -31,6 +31,8 @@ #define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE) #define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT) #define SIZEOF_V128 16 +#define SIZEOF_V2 8 +#define SIZEOF_V3 12 #define INTERP_STACK_SIZE (1024*1024) #define INTERP_REDZONE_SIZE (8*1024) diff --git a/src/mono/mono/mini/interp/interp-simd-intrins.def b/src/mono/mono/mini/interp/interp-simd-intrins.def index 05f32f7e0f2ee..95819af941aee 100644 --- a/src/mono/mono/mini/interp/interp-simd-intrins.def +++ b/src/mono/mono/mini/interp/interp-simd-intrins.def @@ -71,6 +71,12 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_DIVISION, interp_v128_r4_op_division, 231) INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_BITCAST, interp_v128_bitcast, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V2, interp_v128_to_v2, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_TO_V3, interp_v128_to_v3, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V128, interp_v2_to_v128, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V2_TO_V3, interp_v2_to_v3, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V128, interp_v3_to_v128, -1) +INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V3_TO_V2, interp_v3_to_v2, -1) INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation, 97) INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation, 129) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index 4b3ca913c6e5c..e1c728ba5069d 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -35,6 +35,58 @@ interp_v128_bitcast (gpointer res, gpointer v1) *(v128_i1*)res = *(v128_i1*)v1; } +// Vector2 AsVector2(Vector128 v1) +static void +interp_v128_to_v2 (gpointer res, gpointer v1) +{ + memcpy(res, v1, SIZEOF_V2); +} + +// Vector3 AsVector3(Vector128 v1) +static void +interp_v128_to_v3 (gpointer res, gpointer v1) +{ + memcpy(res, v1, SIZEOF_V3); +} + +// Vector128 AsVector128(Vector2 v1) +static void +interp_v2_to_v128 (gpointer res, gpointer v1) +{ + memcpy(res, v1, SIZEOF_V2); + + guint32 *res_typed = (guint32*)res; + res_typed [2] = 0; + res_typed [3] = 0; +} + +// Vector3 AsVector3(Vector2 v1) +static void +interp_v2_to_v3 (gpointer res, gpointer v1) +{ + memcpy(res, v1, SIZEOF_V2); + + guint32 *res_typed = (guint32*)res; + res_typed [2] = 0; +} + +// Vector128 AsVector128(Vector3 v1) +static void +interp_v3_to_v128 (gpointer res, gpointer v1) +{ + memcpy(res, v1, SIZEOF_V3); + + guint32 *res_typed = (guint32*)res; + res_typed [3] = 0; +} + +// Vector2 AsVector128(Vector3 v1) +static void +interp_v3_to_v2 (gpointer res, gpointer v1) +{ + memcpy(res, v1, SIZEOF_V2); +} + // op_Addition static void interp_v128_i1_op_addition (gpointer res, gpointer v1, gpointer v2) diff --git a/src/mono/mono/mini/interp/simd-methods.def b/src/mono/mono/mini/interp/simd-methods.def index a919c585114ed..38d971a4ec852 100644 --- a/src/mono/mono/mini/interp/simd-methods.def +++ b/src/mono/mono/mini/interp/simd-methods.def @@ -39,8 +39,11 @@ SIMD_METHOD(AsUInt16) SIMD_METHOD(AsUInt32) SIMD_METHOD(AsUInt64) SIMD_METHOD(AsVector) -SIMD_METHOD(AsVector4) SIMD_METHOD(AsVector128) +SIMD_METHOD(AsVector128Unsafe) +SIMD_METHOD(AsVector2) +SIMD_METHOD(AsVector3) +SIMD_METHOD(AsVector4) SIMD_METHOD(ConditionalSelect) SIMD_METHOD(Create) SIMD_METHOD(CreateScalar) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index 2b64baff7f44a..a1a059dcdc254 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -72,8 +72,11 @@ static guint16 sri_vector128_methods [] = { SN_AsUInt32, SN_AsUInt64, SN_AsVector, - SN_AsVector4, SN_AsVector128, + SN_AsVector128Unsafe, + SN_AsVector2, + SN_AsVector3, + SN_AsVector4, SN_ConditionalSelect, SN_Create, SN_CreateScalar, @@ -470,6 +473,9 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature } case SN_AsVector: case SN_AsVector128: + case SN_AsVector128Unsafe: + case SN_AsVector2: + case SN_AsVector3: case SN_AsVector4: { if (!is_element_type_primitive (csignature->ret) || !is_element_type_primitive (csignature->params [0])) return FALSE; @@ -485,7 +491,42 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITCAST; break; } - return FALSE; + + if ((ret_size != 8) && (ret_size != 12) && (ret_size != 16)) { + return FALSE; + } + + if ((arg_size != 8) && (arg_size != 12) && (arg_size != 16)) { + return FALSE; + } + + if (arg_size > ret_size) { + simd_opcode = MINT_SIMD_INTRINS_P_P; + + if (ret_size == 8) { + if (arg_size == 16) { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V2; + } else { + simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V2; + } + } else { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V3; + } + break; + } else { + simd_opcode = MINT_SIMD_INTRINS_P_P; + + if (arg_size == 8) { + if (ret_size == 12) { + simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V3; + } else { + simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V128; + } + } else { + simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V128; + } + break; + } } case SN_ConditionalSelect: simd_opcode = MINT_SIMD_INTRINS_P_PPP; diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index bcd4ae0639e92..56cb518a3ac26 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1196,6 +1196,9 @@ static guint16 sri_vector_methods [] = { SN_AsUInt64, SN_AsVector, SN_AsVector128, + SN_AsVector128Unsafe, + SN_AsVector2, + SN_AsVector3, SN_AsVector4, SN_BitwiseAnd, SN_BitwiseOr, @@ -1640,7 +1643,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } case SN_AsVector: case SN_AsVector128: - case SN_AsVector4: { + case SN_AsVector128Unsafe: + case SN_AsVector2: + case SN_AsVector3: + case SN_AsVector4: + case SN_AsVector4Unsafe: { if (!is_element_type_primitive (fsig->ret) || !is_element_type_primitive (fsig->params [0])) return NULL; @@ -1650,10 +1657,54 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); - if (arg_size == ret_size) + if (arg_size == ret_size) { return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1); + } - return NULL; + if ((ret_size != 8) && (ret_size != 12) && (ret_size != 16)) { + return NULL; + } + + if ((arg_size != 8) && (arg_size != 12) && (arg_size != 16)) { + return NULL; + } + + bool isUnsafe = (id == SN_AsVector128Unsafe) || (id == SN_AsVector4Unsafe); + + if (arg_size > ret_size) { +#ifdef TARGET_ARM64 + if (ret_size == 8) { + return emit_simd_ins_for_sig (cfg, klass, OP_XLOWER, 0, arg0_type, fsig, args); + } +#endif + return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1); + } else { +#ifdef TARGET_ARM64 + if (arg_size == 8) { + int op = isUnsafe ? OP_XWIDEN : OP_XWIDEN_UNSAFE; + return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); + } +#endif + MonoInst *ins = args [0]; + + if (!isUnsafe) { + static float r4_0 = 0; + MonoInst *zero; + int zero_dreg = alloc_freg (cfg); + MONO_INST_NEW (cfg, zero, OP_R4CONST); + zero->inst_p0 = (void*)&r4_0; + zero->dreg = zero_dreg; + MONO_ADD_INS (cfg->cbb, zero); + + if (arg_size == 8) { + ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 2, FALSE); + } + if (ret_size == 16) { + ins = emit_vector_insert_element (cfg, klass, ins, MONO_TYPE_R4, zero, 3, FALSE); + } + } + return emit_simd_ins (cfg, klass, OP_XCAST, ins->dreg, -1); + } } case SN_Ceiling: case SN_Floor: { diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h index 071da07c9e5bf..1156e56a31689 100644 --- a/src/mono/mono/mini/simd-methods.h +++ b/src/mono/mono/mini/simd-methods.h @@ -82,10 +82,12 @@ METHOD(AsUInt32) METHOD(AsUInt64) METHOD(AsVector) METHOD(AsVector128) +METHOD(AsVector128Unsafe) METHOD(AsVector2) METHOD(AsVector256) METHOD(AsVector3) METHOD(AsVector4) +METHOD(AsVector4Unsafe) METHOD(BitwiseAnd) METHOD(BitwiseOr) METHOD(Create) From a79b77cbcf2a26f6f9d2c123c573083909c1f043 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 4 Jul 2024 16:46:53 -0700 Subject: [PATCH 02/11] Check an interpreter behavior --- src/mono/mono/mini/interp/interp-simd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index e1c728ba5069d..a715c8566b44c 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -40,6 +40,10 @@ static void interp_v128_to_v2 (gpointer res, gpointer v1) { memcpy(res, v1, SIZEOF_V2); + + guint32 *res_typed = (guint32*)res; + res_typed [2] = 0; + res_typed [3] = 0; } // Vector3 AsVector3(Vector128 v1) @@ -47,6 +51,9 @@ static void interp_v128_to_v3 (gpointer res, gpointer v1) { memcpy(res, v1, SIZEOF_V3); + + guint32 *res_typed = (guint32*)res; + res_typed [3] = 0; } // Vector128 AsVector128(Vector2 v1) @@ -68,6 +75,7 @@ interp_v2_to_v3 (gpointer res, gpointer v1) guint32 *res_typed = (guint32*)res; res_typed [2] = 0; + res_typed [3] = 0; } // Vector128 AsVector128(Vector3 v1) @@ -85,6 +93,10 @@ static void interp_v3_to_v2 (gpointer res, gpointer v1) { memcpy(res, v1, SIZEOF_V2); + + guint32 *res_typed = (guint32*)res; + res_typed [2] = 0; + res_typed [3] = 0; } // op_Addition From f5b3067ade0b7677a4ae271ee696c1bc9ef08084 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 5 Jul 2024 18:55:27 -0700 Subject: [PATCH 03/11] Add an assert to see if Mono is reporting the correct size for Vector3 --- src/mono/mono/mini/interp/transform-simd.c | 8 ++++++++ src/mono/mono/mini/mini-llvm.c | 9 +++++++++ src/mono/mono/mini/mini.c | 3 +-- src/mono/mono/mini/simd-intrinsics.c | 14 +++++++++----- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index a1a059dcdc254..873e5e284a311 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -486,6 +486,14 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature MonoClass *arg_class = mono_class_from_mono_type_internal (csignature->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); + if (id == SN_AsVector2) { + g_assert(ret_size == 8); + g_assert((arg_size == 12) || (arg_size == 16)); + } else if (id == SN_AsVector3) { + g_assert(ret_size == 12); + g_assert((arg_size == 8) || (arg_size == 16)); + } + if (arg_size == ret_size) { simd_opcode = MINT_SIMD_INTRINS_P_P; simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITCAST; diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index c6be2c45af7b0..c10aa24e281a4 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -676,6 +676,10 @@ simd_class_to_llvm_type (EmitContext *ctx, MonoClass *klass) } else { guint32 nelems; MonoTypeEnum type = mini_get_simd_type_info (klass, &nelems); + if (nelems == 3) { + // Override to 3 elements + zero + nelems == 4; + } return LLVMVectorType (primitive_type_to_llvm_type (type), nelems); } g_assert_not_reached (); @@ -8276,6 +8280,11 @@ MONO_RESTORE_WARNING case OP_XCONST: { int ecount; MonoTypeEnum etype = mini_get_simd_type_info (ins->klass, (guint32*)&ecount); + + if (ecount == 3) { + // Override to 3 elements + zero + ecount == 4; + } LLVMTypeRef llvm_type = primitive_type_to_llvm_type (etype); LLVMValueRef vals [64]; diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index 395a869ab22be..82c913f285c1d 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -4599,8 +4599,7 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems) *nelems = 2; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector3")) { - // For LLVM SIMD support, Vector3 is treated as a 4-element vector (three elements + zero). - *nelems = 4; + *nelems = 3; return MONO_TYPE_R4; } else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) { MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0]; diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 56cb518a3ac26..30eef690e1aa2 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -653,11 +653,6 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t guint32 nelems; mini_get_simd_type_info (vector_class, &nelems); - // Override nelems for Vector3, with actual number of elements, instead of treating it as a 4-element vector (three elements + zero). - const char *klass_name = m_class_get_name (vector_class); - if (!strcmp (klass_name, "Vector3")) - nelems = 3; - element_size = vector_size / nelems; gboolean has_single_element = vector_size == element_size; @@ -1656,6 +1651,15 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); + + if (id == SN_AsVector2) { + g_assert(ret_size == 8); + g_assert((arg_size == 12) || (arg_size == 16)); + } else if (id == SN_AsVector3) { + g_assert(ret_size == 12); + g_assert((arg_size == 8) || (arg_size == 16)); + } + if (arg_size == ret_size) { return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1); From 75bd2c324b98efb12c8405d387337003df32fe9a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 6 Jul 2024 12:29:59 -0700 Subject: [PATCH 04/11] Ensure that the correct return class/size is used for emit_common_simd_epilogue --- src/mono/mono/mini/interp/transform-simd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index 873e5e284a311..7bc6b441d7417 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -379,7 +379,6 @@ emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMetho g_assert (allow_void); interp_ins_set_dummy_dreg (td->last_ins, td); } else if (ret_mt == MINT_TYPE_VT) { - // For these intrinsics, if we return a VT then it is a V128 push_type_vt (td, vector_klass, vector_size); interp_ins_set_dreg (td->last_ins, td->sp [-1].var); } else { @@ -486,6 +485,9 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature MonoClass *arg_class = mono_class_from_mono_type_internal (csignature->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); + vector_klass = ret_class; + vector_size = ret_size; + if (id == SN_AsVector2) { g_assert(ret_size == 8); g_assert((arg_size == 12) || (arg_size == 16)); @@ -515,9 +517,12 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature if (arg_size == 16) { simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V2; } else { + g_assert (arg_size == 12); simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V2; } } else { + g_assert (arg_size == 16); + g_assert (ret_size == 12); simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V3; } break; @@ -528,9 +533,12 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature if (ret_size == 12) { simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V3; } else { + g_assert (ret_size == 16); simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V128; } } else { + g_assert (arg_size == 12); + g_assert (ret_size == 16); simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V128; } break; From 0489876984f6e76294c016d42e7a14fb911494b3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 6 Jul 2024 17:35:11 -0700 Subject: [PATCH 05/11] Ensure we only copy the amount of memory required --- src/mono/mono/mini/interp/interp-simd.c | 46 +++++++++++++++---------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index a715c8566b44c..1917b2c3130e0 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -39,30 +39,34 @@ interp_v128_bitcast (gpointer res, gpointer v1) static void interp_v128_to_v2 (gpointer res, gpointer v1) { - memcpy(res, v1, SIZEOF_V2); + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; - guint32 *res_typed = (guint32*)res; - res_typed [2] = 0; - res_typed [3] = 0; + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; } // Vector3 AsVector3(Vector128 v1) static void interp_v128_to_v3 (gpointer res, gpointer v1) { - memcpy(res, v1, SIZEOF_V3); + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; - guint32 *res_typed = (guint32*)res; - res_typed [3] = 0; + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; + res_typed [2] = v1_typed [2]; } // Vector128 AsVector128(Vector2 v1) static void interp_v2_to_v128 (gpointer res, gpointer v1) { - memcpy(res, v1, SIZEOF_V2); + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; - guint32 *res_typed = (guint32*)res; + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; res_typed [2] = 0; res_typed [3] = 0; } @@ -71,20 +75,24 @@ interp_v2_to_v128 (gpointer res, gpointer v1) static void interp_v2_to_v3 (gpointer res, gpointer v1) { - memcpy(res, v1, SIZEOF_V2); + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; - guint32 *res_typed = (guint32*)res; + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; res_typed [2] = 0; - res_typed [3] = 0; } // Vector128 AsVector128(Vector3 v1) static void interp_v3_to_v128 (gpointer res, gpointer v1) { - memcpy(res, v1, SIZEOF_V3); + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; - guint32 *res_typed = (guint32*)res; + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; + res_typed [2] = v1_typed [2]; res_typed [3] = 0; } @@ -92,11 +100,11 @@ interp_v3_to_v128 (gpointer res, gpointer v1) static void interp_v3_to_v2 (gpointer res, gpointer v1) { - memcpy(res, v1, SIZEOF_V2); - - guint32 *res_typed = (guint32*)res; - res_typed [2] = 0; - res_typed [3] = 0; + float *res_typed = (float*)res; + float *v1_typed = (float*)v1; + + res_typed [0] = v1_typed [0]; + res_typed [1] = v1_typed [1]; } // op_Addition From 70cb4bdff57b1a840f43a95838a8b2d74abe350f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 6 Jul 2024 21:29:24 -0700 Subject: [PATCH 06/11] See if a more explicit r4_equality implementation fixes the issues --- src/mono/mono/mini/interp/interp-simd.c | 98 +++++++++++++++++++------ 1 file changed, 75 insertions(+), 23 deletions(-) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index 1917b2c3130e0..a885d258fda05 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -6,6 +6,8 @@ #include #endif +#include + #ifdef INTERP_ENABLE_SIMD gboolean interp_simd_enabled = TRUE; @@ -175,13 +177,18 @@ interp_v128_op_bitwise_or (gpointer res, gpointer v1, gpointer v2) static void interp_v128_op_bitwise_equality (gpointer res, gpointer v1, gpointer v2) { - gint64 *v1_cast = (gint64*)v1; - gint64 *v2_cast = (gint64*)v2; + gint64 *v1_typed = (gint64*)v1; + gint64 *v2_typed = (gint64*)v2; + + bool succeeded = true; - if (*v1_cast == *v2_cast && *(v1_cast + 1) == *(v2_cast + 1)) - *(gint32*)res = 1; - else - *(gint32*)res = 0; + if (v1_typed [0] != v2_typed [0]) { + succeeded = false; + } else if (v1_typed [1] != v2_typed [1]) { + succeeded = false; + } + + *(gint32*)res = succeeded ? 1 : 0; } // op_ExclusiveOr @@ -195,36 +202,81 @@ interp_v128_op_exclusive_or (gpointer res, gpointer v1, gpointer v2) static void interp_v128_op_bitwise_inequality (gpointer res, gpointer v1, gpointer v2) { - gint64 *v1_cast = (gint64*)v1; - gint64 *v2_cast = (gint64*)v2; + gint64 *v1_typed = (gint64*)v1; + gint64 *v2_typed = (gint64*)v2; + + bool succeeded = false; + + if (v1_typed [0] != v2_typed [0]) { + succeeded = true; + } else if (v1_typed [1] != v2_typed [1]) { + succeeded = true; + } + + *(gint32*)res = succeeded ? 1 : 0; +} + +static bool +r4_float_equality(float v1, float v2) +{ + if (v1 == v2) { + return true; + } else if (mono_isnan (v1) && mono_isnan (v2)) { + return true; + } - if (*v1_cast == *v2_cast && *(v1_cast + 1) == *(v2_cast + 1)) - *(gint32*)res = 0; - else - *(gint32*)res = 1; + return false; } -// Vector128EqualsFloatingPoint +// Vector128.EqualsFloatingPoint static void interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2) { - v128_r4 v1_cast = *(v128_r4*)v1; - v128_r4 v2_cast = *(v128_r4*)v2; - v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); - memset (&v1_cast, 0xff, SIZEOF_V128); + float *v1_typed = (float*)v1; + float *v2_typed = (float*)v2; + + bool succeeded = true; + + if (!r4_float_equality(v1_typed [0], v2_typed [0])) { + succeeded = false; + } else if (!r4_float_equality(v1_typed [1], v2_typed [1])) { + succeeded = false; + } else if (!r4_float_equality(v1_typed [2], v2_typed [2])) { + succeeded = false; + } else if (!r4_float_equality(v1_typed [3], v2_typed [3])) { + succeeded = false; + } + + *(gint32*)res = succeeded ? 1 : 0; +} + +static bool +r8_float_equality(double v1, double v2) +{ + if (v1 == v2) { + return true; + } else if (mono_isnan (v1) && mono_isnan (v2)) { + return true; + } - *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0; + return false; } static void interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2) { - v128_r8 v1_cast = *(v128_r8*)v1; - v128_r8 v2_cast = *(v128_r8*)v2; - v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); - memset (&v1_cast, 0xff, SIZEOF_V128); + double *v1_typed = (double*)v1; + double *v2_typed = (double*)v2; + + bool succeeded = true; + + if (!r8_float_equality(v1_typed [0], v2_typed [0])) { + succeeded = false; + } else if (!r8_float_equality(v1_typed [1], v2_typed [1])) { + succeeded = false; + } - *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0; + *(gint32*)res = succeeded ? 1 : 0; } // op_Multiply From a8cf3ba85c79a92473526b081bc4e4d0ffe4450c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 7 Jul 2024 09:34:48 -0700 Subject: [PATCH 07/11] Add more explicit validation --- src/mono/mono/mini/interp/transform-simd.c | 24 ++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index 7bc6b441d7417..276cbf1f84cb9 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -482,18 +482,34 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature MonoClass *ret_class = mono_class_from_mono_type_internal (csignature->ret); int ret_size = mono_class_value_size (ret_class, NULL); + if (!strcmp (m_class_get_name (ret_class), "Vector2")) { + g_assert (ret_size == 8); + } else if (!strcmp (m_class_get_name (ret_class), "Vector3")) { + g_assert (ret_size == 12); + } else { + g_assert (ret_size == 16); + } + MonoClass *arg_class = mono_class_from_mono_type_internal (csignature->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); + if (!strcmp (m_class_get_name (arg_class), "Vector2")) { + g_assert (arg_size == 8); + } else if (!strcmp (m_class_get_name (arg_class), "Vector3")) { + g_assert (arg_size == 12); + } else { + g_assert (arg_size == 16); + } + vector_klass = ret_class; vector_size = ret_size; if (id == SN_AsVector2) { - g_assert(ret_size == 8); - g_assert((arg_size == 12) || (arg_size == 16)); + g_assert (ret_size == 8); + g_assert ((arg_size == 12) || (arg_size == 16)); } else if (id == SN_AsVector3) { - g_assert(ret_size == 12); - g_assert((arg_size == 8) || (arg_size == 16)); + g_assert (ret_size == 12); + g_assert ((arg_size == 8) || (arg_size == 16)); } if (arg_size == ret_size) { From 6085a5ddb5e83420b6489dabecbdee62ebb8cedf Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 Jul 2024 11:22:36 -0700 Subject: [PATCH 08/11] Try and explicitly handle instance Equals for Vector2/3 --- .../System/Runtime/Intrinsics/Vector128_1.cs | 13 +- .../mono/mini/interp/interp-simd-intrins.def | 7 +- src/mono/mono/mini/interp/interp-simd.c | 135 +++++++----------- src/mono/mono/mini/interp/simd-methods.def | 1 - src/mono/mono/mini/interp/transform-simd.c | 114 +++++++++------ src/mono/mono/mini/simd-intrinsics.c | 14 +- 6 files changed, 132 insertions(+), 152 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index 23891dcc2b67d..11bc0310203b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -372,16 +372,6 @@ public static Vector128 operator >>>(Vector128 value, int shiftCount) /// The type of the vector () is not supported. public override bool Equals([NotNullWhen(true)] object? obj) => (obj is Vector128 other) && Equals(other); - // Account for floating-point equality around NaN - // This is in a separate method so it can be optimized by the mono interpreter/jiterpreter - [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool EqualsFloatingPoint(Vector128 lhs, Vector128 rhs) - { - Vector128 result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs)); - return result.AsInt32() == Vector128.AllBitsSet; - } - /// Determines whether the specified is equal to the current instance. /// The to compare with the current instance. /// true if is equal to the current instance; otherwise, false. @@ -395,7 +385,8 @@ public bool Equals(Vector128 other) { if ((typeof(T) == typeof(double)) || (typeof(T) == typeof(float))) { - return EqualsFloatingPoint(this, other); + Vector128 result = Vector128.Equals(this, other) | ~(Vector128.Equals(this, this) | Vector128.Equals(other, other)); + return result.AsInt32() == Vector128.AllBitsSet; } else { diff --git a/src/mono/mono/mini/interp/interp-simd-intrins.def b/src/mono/mono/mini/interp/interp-simd-intrins.def index 95819af941aee..c6ba64da4c2c0 100644 --- a/src/mono/mono/mini/interp/interp-simd-intrins.def +++ b/src/mono/mono/mini/interp/interp-simd-intrins.def @@ -58,8 +58,11 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_o INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, -1) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY, interp_v128_r4_float_equality, -1) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY, interp_v128_r8_float_equality, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4, interp_v128_instance_equals_r4, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4, interp_v2_instance_equals_r4, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4, interp_v3_instance_equals_r4, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4, interp_v128_instance_equals_r8, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITIWSE, interp_v128_instance_equals_bitwise, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index a885d258fda05..d4b44af4d8014 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -41,23 +41,14 @@ interp_v128_bitcast (gpointer res, gpointer v1) static void interp_v128_to_v2 (gpointer res, gpointer v1) { - float *res_typed = (float*)res; - float *v1_typed = (float*)v1; - - res_typed [0] = v1_typed [0]; - res_typed [1] = v1_typed [1]; + memcpy (res, v1, SIZEOF_V2); } // Vector3 AsVector3(Vector128 v1) static void interp_v128_to_v3 (gpointer res, gpointer v1) { - float *res_typed = (float*)res; - float *v1_typed = (float*)v1; - - res_typed [0] = v1_typed [0]; - res_typed [1] = v1_typed [1]; - res_typed [2] = v1_typed [2]; + memcpy (res, v1, SIZEOF_V3); } // Vector128 AsVector128(Vector2 v1) @@ -102,11 +93,7 @@ interp_v3_to_v128 (gpointer res, gpointer v1) static void interp_v3_to_v2 (gpointer res, gpointer v1) { - float *res_typed = (float*)res; - float *v1_typed = (float*)v1; - - res_typed [0] = v1_typed [0]; - res_typed [1] = v1_typed [1]; + memcpy (res, v1, SIZEOF_V2); } // op_Addition @@ -177,18 +164,13 @@ interp_v128_op_bitwise_or (gpointer res, gpointer v1, gpointer v2) static void interp_v128_op_bitwise_equality (gpointer res, gpointer v1, gpointer v2) { - gint64 *v1_typed = (gint64*)v1; - gint64 *v2_typed = (gint64*)v2; - - bool succeeded = true; + gint64 *v1_cast = (gint64*)v1; + gint64 *v2_cast = (gint64*)v2; - if (v1_typed [0] != v2_typed [0]) { - succeeded = false; - } else if (v1_typed [1] != v2_typed [1]) { - succeeded = false; - } - - *(gint32*)res = succeeded ? 1 : 0; + if (*v1_cast == *v2_cast && *(v1_cast + 1) == *(v2_cast + 1)) + *(gint32*)res = 1; + else + *(gint32*)res = 0; } // op_ExclusiveOr @@ -202,81 +184,70 @@ interp_v128_op_exclusive_or (gpointer res, gpointer v1, gpointer v2) static void interp_v128_op_bitwise_inequality (gpointer res, gpointer v1, gpointer v2) { - gint64 *v1_typed = (gint64*)v1; - gint64 *v2_typed = (gint64*)v2; + gint64 *v1_cast = (gint64*)v1; + gint64 *v2_cast = (gint64*)v2; - bool succeeded = false; - - if (v1_typed [0] != v2_typed [0]) { - succeeded = true; - } else if (v1_typed [1] != v2_typed [1]) { - succeeded = true; - } - - *(gint32*)res = succeeded ? 1 : 0; + if (*v1_cast == *v2_cast && *(v1_cast + 1) == *(v2_cast + 1)) + *(gint32*)res = 0; + else + *(gint32*)res = 1; } -static bool -r4_float_equality(float v1, float v2) +// Vector128.Equals +static void +interp_v128_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) { - if (v1 == v2) { - return true; - } else if (mono_isnan (v1) && mono_isnan (v2)) { - return true; - } + v128_r4 v1_cast = *(*(v128_r4**))v1; + v128_r4 v2_cast = *(v128_r4*)v2; + v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); + memset (&v1_cast, 0xff, SIZEOF_V128); - return false; + *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0; } -// Vector128.EqualsFloatingPoint +// Vector2.Equals static void -interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2) +interp_v2_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) { - float *v1_typed = (float*)v1; - float *v2_typed = (float*)v2; - - bool succeeded = true; - - if (!r4_float_equality(v1_typed [0], v2_typed [0])) { - succeeded = false; - } else if (!r4_float_equality(v1_typed [1], v2_typed [1])) { - succeeded = false; - } else if (!r4_float_equality(v1_typed [2], v2_typed [2])) { - succeeded = false; - } else if (!r4_float_equality(v1_typed [3], v2_typed [3])) { - succeeded = false; - } + v128_r4 v1_cast; + interp_v2_to_v128 (&v1_cast, v1); + v128_r4 v2_cast = *(v128_r4*)v2; + v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); + memset (&v1_cast, 0xff, SIZEOF_V2); - *(gint32*)res = succeeded ? 1 : 0; + *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V2) == 0; } -static bool -r8_float_equality(double v1, double v2) +// Vector3.Equals +static void +interp_v3_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) { - if (v1 == v2) { - return true; - } else if (mono_isnan (v1) && mono_isnan (v2)) { - return true; - } + v128_r4 v1_cast; + interp_v3_to_v128 (&v1_cast, v1); + v128_r4 v2_cast = *(v128_r4*)v2; + v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); + memset (&v1_cast, 0xff, SIZEOF_V3); - return false; + *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V3) == 0; } +// Vector128.Equals static void -interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2) +interp_v128_instance_equals_r8 (gpointer res, gpointer v1, gpointer v2) { - double *v1_typed = (double*)v1; - double *v2_typed = (double*)v2; - - bool succeeded = true; + v128_r8 v1_cast = *(*(v128_r8**))v1; + v128_r8 v2_cast = *(v128_r8*)v2; + v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); + memset (&v1_cast, 0xff, SIZEOF_V128); - if (!r8_float_equality(v1_typed [0], v2_typed [0])) { - succeeded = false; - } else if (!r8_float_equality(v1_typed [1], v2_typed [1])) { - succeeded = false; - } + *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0; +} - *(gint32*)res = succeeded ? 1 : 0; +// Vector128.Equals, for integer T +static void +interp_v128_instance_equals_bitwise (gpointer res, gpointer v1, gpointer v2) +{ + interp_v128_op_bitwise_equality(res, *(v128_i1**)v1, v2); } // op_Multiply diff --git a/src/mono/mono/mini/interp/simd-methods.def b/src/mono/mono/mini/interp/simd-methods.def index 38d971a4ec852..0f494f4ef6a50 100644 --- a/src/mono/mono/mini/interp/simd-methods.def +++ b/src/mono/mono/mini/interp/simd-methods.def @@ -51,7 +51,6 @@ SIMD_METHOD(CreateScalarUnsafe) SIMD_METHOD(Equals) SIMD_METHOD(EqualsAny) -SIMD_METHOD(EqualsFloatingPoint) SIMD_METHOD(ExtractMostSignificantBits) SIMD_METHOD(GreaterThan) SIMD_METHOD(LessThan) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index 276cbf1f84cb9..3f8a41b6fb6f5 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -98,7 +98,7 @@ static guint16 sri_vector128_methods [] = { }; static guint16 sri_vector128_t_methods [] = { - SN_EqualsFloatingPoint, + SN_Equals, SN_get_AllBitsSet, SN_get_Count, SN_get_One, @@ -120,6 +120,7 @@ static guint16 sri_vector128_t_methods [] = { }; static guint16 sn_vector_t_methods [] = { + SN_Equals, SN_ctor, SN_get_AllBitsSet, SN_get_Count, @@ -220,13 +221,6 @@ emit_common_simd_operations (TransformData *td, int id, int atype, int vector_si *simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY; } break; - case SN_EqualsFloatingPoint: - *simd_opcode = MINT_SIMD_INTRINS_P_PP; - if (atype == MONO_TYPE_R4) - *simd_intrins = INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY; - else if (atype == MONO_TYPE_R8) - *simd_intrins = INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY; - break; case SN_op_ExclusiveOr: *simd_opcode = MINT_SIMD_INTRINS_P_PP; *simd_intrins = INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR; @@ -482,36 +476,12 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature MonoClass *ret_class = mono_class_from_mono_type_internal (csignature->ret); int ret_size = mono_class_value_size (ret_class, NULL); - if (!strcmp (m_class_get_name (ret_class), "Vector2")) { - g_assert (ret_size == 8); - } else if (!strcmp (m_class_get_name (ret_class), "Vector3")) { - g_assert (ret_size == 12); - } else { - g_assert (ret_size == 16); - } - MonoClass *arg_class = mono_class_from_mono_type_internal (csignature->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); - if (!strcmp (m_class_get_name (arg_class), "Vector2")) { - g_assert (arg_size == 8); - } else if (!strcmp (m_class_get_name (arg_class), "Vector3")) { - g_assert (arg_size == 12); - } else { - g_assert (arg_size == 16); - } - vector_klass = ret_class; vector_size = ret_size; - if (id == SN_AsVector2) { - g_assert (ret_size == 8); - g_assert ((arg_size == 12) || (arg_size == 16)); - } else if (id == SN_AsVector3) { - g_assert (ret_size == 12); - g_assert ((arg_size == 8) || (arg_size == 16)); - } - if (arg_size == ret_size) { simd_opcode = MINT_SIMD_INTRINS_P_P; simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITCAST; @@ -532,13 +502,10 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature if (ret_size == 8) { if (arg_size == 16) { simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V2; - } else { - g_assert (arg_size == 12); + } else if (arg_size == 12) { simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V2; } - } else { - g_assert (arg_size == 16); - g_assert (ret_size == 12); + } else if ((ret_size == 12) && (arg_size == 16)) { simd_intrins = INTERP_SIMD_INTRINSIC_V128_TO_V3; } break; @@ -548,13 +515,10 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature if (arg_size == 8) { if (ret_size == 12) { simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V3; - } else { - g_assert (ret_size == 16); + } else if (ret_size == 16) { simd_intrins = INTERP_SIMD_INTRINSIC_V2_TO_V128; } - } else { - g_assert (arg_size == 12); - g_assert (ret_size == 16); + } else if ((arg_size == 12) && (ret_size == 16)) { simd_intrins = INTERP_SIMD_INTRINSIC_V3_TO_V128; } break; @@ -700,8 +664,21 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg)) return FALSE; - if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) + if (id == SN_Equals) { + simd_opcode = MINT_SIMD_INTRINS_P_PP; + + if (atype == MONO_TYPE_R4) { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4; + } + else if (atype == MONO_TYPE_R8) { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8; + } + else { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITWISE; + } + } else if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) { goto opcode_added; + } if (simd_opcode == -1 || simd_intrins == -1) return FALSE; @@ -760,7 +737,7 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c } static gboolean -emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj) +emit_sn_vector_2_3_4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj) { int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod); if (id == -1) @@ -773,14 +750,57 @@ emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *cs MonoClass *vector_klass = cmethod->klass; MonoTypeEnum atype = MONO_TYPE_R4; - int vector_size = SIZEOF_V128; + int vector_size = mono_class_value_size (vector_klass, NULL); int arg_size = sizeof (float); + + if ((vector_size != 8) && (vector_size != 12) && (vector_size != 16)) { + return FALSE; + } + int scalar_arg = -1; for (int i = 0; i < csignature->param_count; i++) { if (csignature->params [i]->type != MONO_TYPE_GENERICINST) scalar_arg = i; } + const char *class_name = m_class_get_name (vector_klass); + bool isQuaternion = !strcmp (class_name, "Quaternion"); + bool isPlane = !strcmp (class_name, "Plane"); + + if (id == SN_ctor) { + if ((vector_size == 8) || (vector_size == 12)) { + // FIXME: We should handle creation of Vector2/Vector3 + return FALSE; + } else if (isQuaternion || isPlane) { + // FIXME: We should handle creation of Quaternion/Plane + return FALSE; + } + } + + if (isQuaternion) { + if ((id == SN_op_Multiply) || (id == SN_op_Division)) { + // FIXME: We should handle multiplication and division of Quaternion + return FALSE; + } + } + + if (id == SN_Equals) { + simd_opcode = MINT_SIMD_INTRINS_P_PP; + + if (vector_size == 8) { + simd_intrins = INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4; + } else if (vector_size == 12) { + simd_intrins = INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4; + } else { + simd_intrins = INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4; + } + } + + if ((vector_size == 8) || (vector_size == 12)) { + // FIXME: We should other APIs for Vector2/Vector3 + return FALSE; + } + if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) { goto opcode_added; } else if (id == SN_ctor) { @@ -1096,8 +1116,8 @@ interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodS } else if (!strcmp (class_ns, "System.Numerics")) { if (!strcmp (class_name, "Vector`1")) return emit_sn_vector_t (td, cmethod, csignature, newobj); - else if (!strcmp (class_name, "Vector4")) - return emit_sn_vector4 (td, cmethod, csignature, newobj); + else if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector3") || !strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane")) + return emit_sn_vector_2_3_4 (td, cmethod, csignature, newobj); } else if (!strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) { if (!strcmp (class_name, "PackedSimd")) return emit_sri_packedsimd (td, cmethod, csignature); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 30eef690e1aa2..254198fd0c868 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1651,15 +1651,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); int arg_size = mono_class_value_size (arg_class, NULL); - - if (id == SN_AsVector2) { - g_assert(ret_size == 8); - g_assert((arg_size == 12) || (arg_size == 16)); - } else if (id == SN_AsVector3) { - g_assert(ret_size == 12); - g_assert((arg_size == 8) || (arg_size == 16)); - } - if (arg_size == ret_size) { return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1); @@ -1685,6 +1676,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } else { #ifdef TARGET_ARM64 if (arg_size == 8) { + if (!COMPILE_LLVM (cfg)) { + // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 + return NULL; + } + int op = isUnsafe ? OP_XWIDEN : OP_XWIDEN_UNSAFE; return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); } From d1f1b9ff01cc35620db74e50bd335e704fce56ee Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 Jul 2024 12:23:23 -0700 Subject: [PATCH 09/11] Fix name to be INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8 --- .../runtime/jiterpreter-trace-generator.ts | 27 ------------------- .../mono/mini/interp/interp-simd-intrins.def | 4 +-- src/mono/mono/mini/interp/interp-simd.c | 4 +-- src/mono/mono/mini/mini-llvm.c | 2 +- 4 files changed, 5 insertions(+), 32 deletions(-) diff --git a/src/mono/browser/runtime/jiterpreter-trace-generator.ts b/src/mono/browser/runtime/jiterpreter-trace-generator.ts index e81c9b6683cbc..b4f3fb5fb353f 100644 --- a/src/mono/browser/runtime/jiterpreter-trace-generator.ts +++ b/src/mono/browser/runtime/jiterpreter-trace-generator.ts @@ -3748,33 +3748,6 @@ function emit_simd_3 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrin builder.appendU8(WasmOpcode.i32_eqz); append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); return true; - case SimdIntrinsic3.V128_R4_FLOAT_EQUALITY: - case SimdIntrinsic3.V128_R8_FLOAT_EQUALITY: { - /* - Vector128 result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs)); - return result.AsInt32() == Vector128.AllBitsSet; - */ - const isR8 = index === SimdIntrinsic3.V128_R8_FLOAT_EQUALITY, - eqOpcode = isR8 ? WasmSimdOpcode.f64x2_eq : WasmSimdOpcode.f32x4_eq; - builder.local("pLocals"); - append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); - builder.local("math_lhs128", WasmOpcode.tee_local); - append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load); - builder.local("math_rhs128", WasmOpcode.tee_local); - builder.appendSimd(eqOpcode); - builder.local("math_lhs128"); - builder.local("math_lhs128"); - builder.appendSimd(eqOpcode); - builder.local("math_rhs128"); - builder.local("math_rhs128"); - builder.appendSimd(eqOpcode); - builder.appendSimd(WasmSimdOpcode.v128_or); - builder.appendSimd(WasmSimdOpcode.v128_not); - builder.appendSimd(WasmSimdOpcode.v128_or); - builder.appendSimd(isR8 ? WasmSimdOpcode.i64x2_all_true : WasmSimdOpcode.i32x4_all_true); - append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); - return true; - } case SimdIntrinsic3.V128_I1_SHUFFLE: { // Detect a constant indices vector and turn it into a const. This allows // v8 to use a more optimized implementation of the swizzle opcode diff --git a/src/mono/mono/mini/interp/interp-simd-intrins.def b/src/mono/mono/mini/interp/interp-simd-intrins.def index c6ba64da4c2c0..133094a51cbc9 100644 --- a/src/mono/mono/mini/interp/interp-simd-intrins.def +++ b/src/mono/mono/mini/interp/interp-simd-intrins.def @@ -61,8 +61,8 @@ INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, inter INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4, interp_v128_instance_equals_r4, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V2_INSTANCE_EQUALS_R4, interp_v2_instance_equals_r4, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V3_INSTANCE_EQUALS_R4, interp_v3_instance_equals_r4, -1) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R4, interp_v128_instance_equals_r8, -1) -INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITIWSE, interp_v128_instance_equals_bitwise, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_R8, interp_v128_instance_equals_r8, -1) +INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_INSTANCE_EQUALS_BITWISE, interp_v128_instance_equals_bitwise, -1) INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81) diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index d4b44af4d8014..2e1feae88bbe1 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -197,7 +197,7 @@ interp_v128_op_bitwise_inequality (gpointer res, gpointer v1, gpointer v2) static void interp_v128_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) { - v128_r4 v1_cast = *(*(v128_r4**))v1; + v128_r4 v1_cast = **(v128_r4**)v1; v128_r4 v2_cast = *(v128_r4*)v2; v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); memset (&v1_cast, 0xff, SIZEOF_V128); @@ -235,7 +235,7 @@ interp_v3_instance_equals_r4 (gpointer res, gpointer v1, gpointer v2) static void interp_v128_instance_equals_r8 (gpointer res, gpointer v1, gpointer v2) { - v128_r8 v1_cast = *(*(v128_r8**))v1; + v128_r8 v1_cast = **(v128_r8**)v1; v128_r8 v2_cast = *(v128_r8*)v2; v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast)); memset (&v1_cast, 0xff, SIZEOF_V128); diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index c10aa24e281a4..98ac68cd7dbb2 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -678,7 +678,7 @@ simd_class_to_llvm_type (EmitContext *ctx, MonoClass *klass) MonoTypeEnum type = mini_get_simd_type_info (klass, &nelems); if (nelems == 3) { // Override to 3 elements + zero - nelems == 4; + nelems = 4; } return LLVMVectorType (primitive_type_to_llvm_type (type), nelems); } From 042fb116aec42d2f8363aea79fb3c5161333ecb5 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 8 Jul 2024 17:08:39 -0700 Subject: [PATCH 10/11] Use =, not == --- src/mono/mono/mini/mini-llvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 98ac68cd7dbb2..57d56da16593f 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -8283,7 +8283,7 @@ MONO_RESTORE_WARNING if (ecount == 3) { // Override to 3 elements + zero - ecount == 4; + ecount = 4; } LLVMTypeRef llvm_type = primitive_type_to_llvm_type (etype); From 9f53b9bd1d7e791454686f885313d01d01702fa6 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 12 Jul 2024 11:24:59 -0700 Subject: [PATCH 11/11] Ensure `emit_common_simd_epilogue` handles instance methods --- src/mono/mono/mini/interp/transform-simd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index 3f8a41b6fb6f5..a3bacd757b342 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -364,8 +364,19 @@ is_element_type_primitive (MonoType *vector_type) static void emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMethodSignature *csignature, int vector_size, gboolean allow_void) { - td->sp -= csignature->param_count; - for (int i = 0; i < csignature->param_count; i++) + // The implicit this isn't tracked as part of param_count unless + // explicit_this is also set, but we shouldn't encounter such + // explicit_this cases for the intrinsics + + guint16 param_count = csignature->param_count; + + if (csignature->hasthis) + param_count++; + + g_assert(!csignature->explicit_this); + + td->sp -= param_count; + for (int i = 0; i < param_count; i++) td->last_ins->sregs [i] = td->sp [i].var; int ret_mt = mono_mint_type (csignature->ret);