Skip to content

Commit

Permalink
Fix Mono Windows x64 SIMD support. (#46812)
Browse files Browse the repository at this point in the history
  • Loading branch information
lateralusX authored Jan 13, 2021
1 parent 36d132e commit e464963
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 51 deletions.
4 changes: 0 additions & 4 deletions src/mono/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,6 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
set(HOST_NO_SYMLINKS 1)
set(MONO_KEYWORD_THREAD "__declspec (thread)")
set(MONO_ZERO_LEN_ARRAY 1)

# FIXME: disable SIMD support for Windows, see https://github.com/dotnet/runtime/issues/1933
set(DISABLE_SIMD 1)

set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>") # statically link VC runtime library
add_compile_options(/W3) # set warning level 3
add_compile_options(/EHsc) # set exception handling behavior
Expand Down
1 change: 0 additions & 1 deletion src/mono/mono/mini/mini.h
Original file line number Diff line number Diff line change
Expand Up @@ -2994,7 +2994,6 @@ MonoCPUFeatures mono_arch_get_cpu_features (void);
#ifdef MONO_ARCH_SIMD_INTRINSICS
void mono_simd_simplify_indirection (MonoCompile *cfg);
void mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins);
void mono_simd_decompose_intrinsics (MonoCompile *cfg);
MonoInst* mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args);
MonoInst* mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr);
void mono_simd_intrinsics_init (void);
Expand Down
152 changes: 124 additions & 28 deletions src/mono/mono/mini/simd-intrinsics-netcore.c
Original file line number Diff line number Diff line change
Expand Up @@ -2018,8 +2018,68 @@ emit_vector256_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fs
return NULL;
}

static
MonoInst*
emit_amd64_intrinsics (const char *class_ns, const char *class_name, MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) {
return emit_x86_intrinsics (cfg, cmethod, fsig, args);
}

if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector128`1"))
return emit_vector128_t (cfg, cmethod, fsig, args);
if (!strcmp (class_name, "Vector128"))
return emit_vector128 (cfg, cmethod, fsig, args);
if (!strcmp (class_name, "Vector256`1"))
return emit_vector256_t (cfg, cmethod, fsig, args);
}

if (!strcmp (class_ns, "System.Numerics")) {
if (!strcmp (class_name, "Vector"))
return emit_sys_numerics_vector (cfg, cmethod, fsig, args);
if (!strcmp (class_name, "Vector`1"))
return emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
}

return NULL;
}
#endif // !TARGET_ARM64

#ifdef TARGET_ARM64
static
MonoInst*
emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
// FIXME: implement Vector64<T>, Vector128<T> and Vector<T> for Arm64
if (!strcmp (class_ns, "System.Runtime.Intrinsics.Arm")) {
return emit_arm64_intrinsics (cfg, cmethod, fsig, args);
}

return NULL;
}
#elif TARGET_AMD64
// TODO: test and enable for x86 too
static
MonoInst*
emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
MonoInst *simd_inst = emit_amd64_intrinsics (class_ns, class_name, cfg, cmethod, fsig, args);
if (simd_inst != NULL) {
cfg->uses_simd_intrinsics |= MONO_CFG_USES_SIMD_INTRINSICS;
cfg->uses_simd_intrinsics |= MONO_CFG_USES_SIMD_INTRINSICS_DECOMPOSE_VTYPE;
}
return simd_inst;
}
#else
static
MonoInst*
emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
return NULL;
}
#endif

MonoInst*
mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
Expand All @@ -2037,45 +2097,81 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
if (m_class_get_nested_in (cmethod->klass))
class_ns = m_class_get_name_space (m_class_get_nested_in (cmethod->klass));

#ifdef TARGET_ARM64
if (!strcmp (class_ns, "System.Runtime.Intrinsics.Arm")) {
MonoInst *ins = emit_arm64_intrinsics (cfg, cmethod, fsig, args);
return ins;
}
#endif // TARGET_ARM64
return emit_simd_intrinsics (class_ns, class_name, cfg, cmethod, fsig, args);
}

#ifdef TARGET_AMD64 // TODO: test and enable for x86 too
if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) {
MonoInst *ins = emit_x86_intrinsics (cfg, cmethod, fsig, args);
return ins;
/*
* Windows x64 value type ABI uses reg/stack references (ArgValuetypeAddrInIReg/ArgValuetypeAddrOnStack)
* for function arguments. When using SIMD intrinsics arguments optimized into OP_ARG needs to be decomposed
* into correspondig SIMD LOADX/STOREX instructions.
*/
#if defined(TARGET_WIN32) && defined(TARGET_AMD64)
static gboolean
decompose_vtype_opt_uses_simd_intrinsics (MonoCompile *cfg, MonoInst *ins)
{
if (cfg->uses_simd_intrinsics & MONO_CFG_USES_SIMD_INTRINSICS_DECOMPOSE_VTYPE)
return TRUE;

switch (ins->opcode) {
case OP_XMOVE:
case OP_XZERO:
case OP_LOADX_MEMBASE:
case OP_LOADX_ALIGNED_MEMBASE:
case OP_STOREX_MEMBASE:
case OP_STOREX_ALIGNED_MEMBASE_REG:
return TRUE;
default:
return FALSE;
}
}

// FIXME: implement Vector64<T>, Vector128<T> and Vector<T> for Arm64

if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector128`1"))
return emit_vector128_t (cfg, cmethod, fsig, args);
if (!strcmp (class_name, "Vector128"))
return emit_vector128 (cfg, cmethod, fsig, args);
if (!strcmp (class_name, "Vector256`1"))
return emit_vector256_t (cfg, cmethod, fsig, args);
static void
decompose_vtype_opt_load_arg (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins, gint32 *sreg_int32)
{
guint32 *sreg = (guint32*)sreg_int32;
MonoInst *src_var = get_vreg_to_inst (cfg, *sreg);
if (src_var && src_var->opcode == OP_ARG && src_var->klass && MONO_CLASS_IS_SIMD (cfg, src_var->klass)) {
MonoInst *varload_ins, *load_ins;
NEW_VARLOADA (cfg, varload_ins, src_var, src_var->inst_vtype);
mono_bblock_insert_before_ins (bb, ins, varload_ins);
MONO_INST_NEW (cfg, load_ins, OP_LOADX_MEMBASE);
load_ins->klass = src_var->klass;
load_ins->type = STACK_VTYPE;
load_ins->sreg1 = varload_ins->dreg;
load_ins->dreg = alloc_xreg (cfg);
mono_bblock_insert_after_ins (bb, varload_ins, load_ins);
*sreg = load_ins->dreg;
}
}

if (!strcmp (class_ns, "System.Numerics")) {
if (!strcmp (class_name, "Vector"))
return emit_sys_numerics_vector (cfg, cmethod, fsig, args);
if (!strcmp (class_name, "Vector`1"))
return emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
void
mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
{
if (cfg->opt & MONO_OPT_SIMD && decompose_vtype_opt_uses_simd_intrinsics (cfg, ins)) {
decompose_vtype_opt_load_arg (cfg, bb, ins, &(ins->sreg1));
decompose_vtype_opt_load_arg (cfg, bb, ins, &(ins->sreg2));
decompose_vtype_opt_load_arg (cfg, bb, ins, &(ins->sreg3));
MonoInst *dest_var = get_vreg_to_inst (cfg, ins->dreg);
if (dest_var && dest_var->opcode == OP_ARG && dest_var->klass && MONO_CLASS_IS_SIMD (cfg, dest_var->klass)) {
MonoInst *varload_ins, *store_ins;
ins->dreg = alloc_xreg (cfg);
NEW_VARLOADA (cfg, varload_ins, dest_var, dest_var->inst_vtype);
mono_bblock_insert_after_ins (bb, ins, varload_ins);
MONO_INST_NEW (cfg, store_ins, OP_STOREX_MEMBASE);
store_ins->klass = dest_var->klass;
store_ins->type = STACK_VTYPE;
store_ins->sreg1 = ins->dreg;
store_ins->dreg = varload_ins->dreg;
mono_bblock_insert_after_ins (bb, varload_ins, store_ins);
}
}
#endif // TARGET_AMD64

return NULL;
}

#else
void
mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
{
}
#endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/

void
mono_simd_simplify_indirection (MonoCompile *cfg)
Expand Down
18 changes: 0 additions & 18 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,29 +877,11 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i
}
}
}

void
mono_simd_decompose_intrinsics (MonoCompile *cfg)
{
MonoBasicBlock *bb;
MonoInst *ins;

for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
for (ins = bb->code; ins; ins = ins->next) {
mono_simd_decompose_intrinsic (cfg, bb, ins);
}
}
}
#else
void
mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
{
}

void
mono_simd_decompose_intrinsics (MonoCompile *cfg)
{
}
#endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/

/*
Expand Down

0 comments on commit e464963

Please sign in to comment.