diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 141501e8a4d9a..10cba6b7eac5c 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -17,6 +17,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" +#include "llvm/ADT/SmallString.h" using namespace clang; using namespace clang::targets; @@ -279,13 +280,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost) return; - StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) - : getArchNameR600(GPUKind); + llvm::SmallString<16> CanonName = + (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) + : getArchNameR600(GPUKind)); + + // Sanitize the name of generic targets. + // e.g. gfx10.1-generic -> gfx10_1_generic + if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && + GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { + std::replace(CanonName.begin(), CanonName.end(), '.', '_'); + std::replace(CanonName.begin(), CanonName.end(), '-', '_'); + } + Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ if (isAMDGCN(getTriple()) && !IsHIPHost) { - assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name"); - Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) + + assert(StringRef(CanonName).starts_with("gfx") && + "Invalid amdgcn canonical name"); + StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind); + Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) + Twine("__")); Builder.defineMacro("__amdgcn_processor__", Twine("\"") + Twine(CanonName) + Twine("\"")); diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 81c22af460d12..3b10444ef71d3 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -131,6 +131,11 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11 + // ARCH-GCN-DAG: #define FP_FAST_FMA 1 // FAST_FMAF-DAG: #define FP_FAST_FMAF 1 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index eeb16ae98ebad..6f18ea0615cb6 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -115,6 +115,11 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s + // GCNDEFAULT-NOT: -target-cpu // GFX600: "-target-cpu" "gfx600" // GFX601: "-target-cpu" "gfx601" @@ -160,3 +165,8 @@ // GFX1151: "-target-cpu" "gfx1151" // GFX1200: "-target-cpu" "gfx1200" // GFX1201: "-target-cpu" "gfx1201" + +// GFX9_GENERIC: "-target-cpu" "gfx9-generic" +// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic" +// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic" +// GFX11_GENERIC: "-target-cpu" "gfx11-generic" diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 2f10bfb1fd82f..4cc748b218079 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -37,7 +37,7 @@ // RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN // AMDGCN: error: unknown target CPU 'not-a-cpu' -// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}} +// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10.1-generic, gfx10.3-generic, gfx11-generic{{$}} // RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM // WEBASM: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 301996847a584..1df2288b3003b 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -520,6 +520,108 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following =========== =============== ============ ===== ================= =============== =============== ====================== +Generic processors allow execution of a single code object on any of the processors that +it supports. Such code objects may not perform as well as those for the non-generic processors. + +Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`). + +Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`) between 1 and 255. +The version of non-generic code objects is always set to 0. + +For a generic code object, adding a new supported processor may require the code generated for the generic target to be changed +so it can continue to execute on the previously supported processors as well as on the new one. +When this happens, the generic code object version number is incremented at the same time as the generic target is updated. + +Each supported processor of a generic target is mapped to the version it was introduced in. +A generic code object can execute on a supported processor if the version of the code object being loaded is +greater than or equal to the version in which the processor was added to the generic target. + + .. table:: AMDGPU Generic Processors + :name: amdgpu-generic-processor-table + + ==================== ============== ================= ================== ================= ================================= + Processor Target Supported Target Features Target Properties Target Restrictions + Triple Processors Supported + Architecture + + ==================== ============== ================= ================== ================= ================================= + ``gfx9-generic`` ``amdgcn`` - ``gfx900`` - xnack - Absolute flat - ``v_mad_mix`` instructions + - ``gfx902`` scratch are not available on + - ``gfx904`` ``gfx900``, ``gfx902``, + - ``gfx906`` ``gfx909``, ``gfx90c`` + - ``gfx909`` - ``v_fma_mix`` instructions + - ``gfx90c`` are not available on ``gfx904`` + - sramecc is not available on + ``gfx906`` + - The following instructions + are not available on ``gfx906``: + + - ``v_fmac_f32`` + - ``v_xnor_b32`` + - ``v_dot4_i32_i8`` + - ``v_dot8_i32_i4`` + - ``v_dot2_i32_i16`` + - ``v_dot2_u32_u16`` + - ``v_dot4_u32_u8`` + - ``v_dot8_u32_u4`` + - ``v_dot2_f32_f16`` + + + ``gfx10.1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are + - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011`` + - ``gfx1012`` - cumode and ``gfx1012`` + - ``gfx1013`` + - ``v_dot4_i32_i8`` + - ``v_dot8_i32_i4`` + - ``v_dot2_i32_i16`` + - ``v_dot2_u32_u16`` + - ``v_dot2c_f32_f16`` + - ``v_dot4c_i32_i8`` + - ``v_dot4_u32_u8`` + - ``v_dot8_u32_u4`` + - ``v_dot2_f32_f16`` + + - BVH Ray Tracing instructions + are not available on + ``gfx1013`` + + + ``gfx10.3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions. + - ``gfx1031`` - cumode scratch + - ``gfx1032`` + - ``gfx1033`` + - ``gfx1034`` + - ``gfx1035`` + - ``gfx1036`` + + + ``gfx11-generic`` ``amdgcn`` - ``gfx1100`` - wavefrontsize64 - Architected Various codegen pessimizations + - ``gfx1101`` - cumode flat scratch are applied to work around some + - ``gfx1102`` - Packed hazards specific to some targets + - ``gfx1103`` work-item within this family. + - ``gfx1150`` IDs + - ``gfx1151`` Not all VGPRs can be used on: + + - ``gfx1100`` + - ``gfx1101`` + - ``gfx1151`` + + SALU floating point instructions + and single-use VGPR hint + instructions are not available + on: + + - ``gfx1150`` + - ``gfx1151`` + + SGPRs are not supported for src1 + in dpp instructions for: + + - ``gfx1150`` + - ``gfx1151`` + ==================== ============== ================= ================== ================= ================================= + + .. _amdgpu-target-features: Target Features @@ -533,7 +635,7 @@ generating the code. A mismatch of features may result in incorrect execution, or a reduction in performance. The target features supported by each processor is listed in -:ref:`amdgpu-processor-table`. +:ref:`amdgpu-processors`. Target features are controlled by exactly one of the following Clang options: @@ -1443,6 +1545,7 @@ The AMDGPU backend uses the following ELF header: - ``ELFABIVERSION_AMDGPU_HSA_V3`` - ``ELFABIVERSION_AMDGPU_HSA_V4`` - ``ELFABIVERSION_AMDGPU_HSA_V5`` + - ``ELFABIVERSION_AMDGPU_HSA_V6`` - ``ELFABIVERSION_AMDGPU_PAL`` - ``ELFABIVERSION_AMDGPU_MESA3D`` ``e_type`` - ``ET_REL`` @@ -1451,7 +1554,8 @@ The AMDGPU backend uses the following ELF header: ``e_entry`` 0 ``e_flags`` See :ref:`amdgpu-elf-header-e_flags-v2-table`, :ref:`amdgpu-elf-header-e_flags-table-v3`, - and :ref:`amdgpu-elf-header-e_flags-table-v4-onwards` + :ref:`amdgpu-elf-header-e_flags-table-v4-v5`, + and :ref:`amdgpu-elf-header-e_flags-table-v6-onwards` ========================== =============================== .. @@ -1471,6 +1575,7 @@ The AMDGPU backend uses the following ELF header: ``ELFABIVERSION_AMDGPU_HSA_V3`` 1 ``ELFABIVERSION_AMDGPU_HSA_V4`` 2 ``ELFABIVERSION_AMDGPU_HSA_V5`` 3 + ``ELFABIVERSION_AMDGPU_HSA_V6`` 4 ``ELFABIVERSION_AMDGPU_PAL`` 0 ``ELFABIVERSION_AMDGPU_MESA3D`` 0 =============================== ===== @@ -1517,6 +1622,10 @@ The AMDGPU backend uses the following ELF header: ``-mcode-object-version=5``. This is the default code object version if not specified. + * ``ELFABIVERSION_AMDGPU_HSA_V6`` is used to specify the version of AMD HSA + runtime ABI for code object V6. Specify using the Clang option + ``-mcode-object-version=6``. + * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL runtime ABI. @@ -1543,8 +1652,9 @@ The AMDGPU backend uses the following ELF header: ``NT_AMD_HSA_ISA_VERSION`` note record for code object V2 (see :ref:`amdgpu-note-records-v2`) and in the ``EF_AMDGPU_MACH`` bit field of the ``e_flags`` for code object V3 and above (see - :ref:`amdgpu-elf-header-e_flags-table-v3` and - :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`). + :ref:`amdgpu-elf-header-e_flags-table-v3`, + :ref:`amdgpu-elf-header-e_flags-table-v4-v5` and + :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`). ``e_entry`` The entry point is 0 as the entry points for individual kernels must be @@ -1615,8 +1725,8 @@ The AMDGPU backend uses the following ELF header: :ref:`amdgpu-target-features`. ================================= ===== ============================= - .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and After - :name: amdgpu-elf-header-e_flags-table-v4-onwards + .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and V5 + :name: amdgpu-elf-header-e_flags-table-v4-v5 ============================================ ===== =================================== Name Value Description @@ -1642,80 +1752,120 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled. ============================================ ===== =================================== + .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V6 and After + :name: amdgpu-elf-header-e_flags-table-v6-onwards + + ============================================ ========== ========================================= + Name Value Description + ============================================ ========== ========================================= + ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection + mask for + ``EF_AMDGPU_MACH_xxx`` values + defined in + :ref:`amdgpu-ef-amdgpu-mach-table`. + ``EF_AMDGPU_FEATURE_XNACK_V4`` 0x300 XNACK selection mask for + ``EF_AMDGPU_FEATURE_XNACK_*_V4`` + values. + ``EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4`` 0x000 XNACK unsupported. + ``EF_AMDGPU_FEATURE_XNACK_ANY_V4`` 0x100 XNACK can have any value. + ``EF_AMDGPU_FEATURE_XNACK_OFF_V4`` 0x200 XNACK disabled. + ``EF_AMDGPU_FEATURE_XNACK_ON_V4`` 0x300 XNACK enabled. + ``EF_AMDGPU_FEATURE_SRAMECC_V4`` 0xc00 SRAMECC selection mask for + ``EF_AMDGPU_FEATURE_SRAMECC_*_V4`` + values. + ``EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4`` 0x000 SRAMECC unsupported. + ``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value. + ``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled, + ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled. + ``EF_AMDGPU_GENERIC_VERSION_V`` 0xff000000 Generic code object version selection + mask. This is a value between 1 and 255, + stored in the most significant byte + of EFLAGS. + See :ref:`amdgpu-generic-processor-table` + ============================================ ========== ========================================= + .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values :name: amdgpu-ef-amdgpu-mach-table - ==================================== ========== ============================= - Name Value Description (see - :ref:`amdgpu-processor-table`) - ==================================== ========== ============================= - ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified* - ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600`` - ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630`` - ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880`` - ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670`` - ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710`` - ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730`` - ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770`` - ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar`` - ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress`` - ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper`` - ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood`` - ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo`` - ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts`` - ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos`` - ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman`` - ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks`` - *reserved* 0x011 - Reserved for ``r600`` - 0x01f architecture processors. - ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600`` - ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601`` - ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700`` - ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701`` - ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702`` - ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703`` - ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704`` - *reserved* 0x027 Reserved. - ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801`` - ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802`` - ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803`` - ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810`` - ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900`` - ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902`` - ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904`` - ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906`` - ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908`` - ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909`` - ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033`` - ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602`` - ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705`` - ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034`` - ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` - ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102`` - ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200`` - *reserved* 0x049 Reserved. - ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151`` - ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941`` - ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` - *reserved* 0x04d Reserved. - ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` - ==================================== ========== ============================= + ========================================== ========== ============================= + Name Value Description (see + :ref:`amdgpu-processor-table`) + ========================================== ========== ============================= + ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified* + ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600`` + ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630`` + ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880`` + ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670`` + ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710`` + ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730`` + ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770`` + ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar`` + ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress`` + ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper`` + ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood`` + ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo`` + ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts`` + ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos`` + ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman`` + ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks`` + *reserved* 0x011 - Reserved for ``r600`` + 0x01f architecture processors. + ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600`` + ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601`` + ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700`` + ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701`` + ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702`` + ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703`` + ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704`` + *reserved* 0x027 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801`` + ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802`` + ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803`` + ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810`` + ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900`` + ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902`` + ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904`` + ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906`` + ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908`` + ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909`` + ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033`` + ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602`` + ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705`` + ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034`` + ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` + ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200`` + *reserved* 0x049 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151`` + ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941`` + ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` + *reserved* 0x04d Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` + *reserved* 0x04f Reserved. + *reserved* 0x050 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10.1-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10.3-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic`` + ========================================== ========== ============================= Sections -------- diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index efd41f9812baa..3eddaee4f7d1a 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -790,11 +790,15 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050, + EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051, + EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052, + EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053, + EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054, // clang-format on // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 6464285980f00..7da11701fefb8 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -111,6 +111,14 @@ enum GPUKind : uint32_t { GK_AMDGCN_FIRST = GK_GFX600, GK_AMDGCN_LAST = GK_GFX1201, + + GK_GFX9_GENERIC = 192, + GK_GFX10_1_GENERIC = 193, + GK_GFX10_3_GENERIC = 194, + GK_GFX11_GENERIC = 195, + + GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC, + GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC, }; /// Instruction set architecture version. @@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t { FEATURE_WGP = 1 << 9, }; +StringRef getArchFamilyNameAMDGCN(GPUKind AK); + StringRef getArchNameAMDGCN(GPUKind AK); StringRef getArchNameR600(GPUKind AK); StringRef getCanonicalArchName(const Triple &T, StringRef Arch); diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 38a9e0e99b6b7..01949c6dad81f 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { return "gfx1200"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: return "gfx1201"; + + // Generic AMDGCN targets + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: + return "gfx9-generic"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: + return "gfx10.1-generic"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: + return "gfx10.3-generic"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: + return "gfx11-generic"; default: llvm_unreachable("Unknown EF_AMDGPU_MACH value"); } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 1436e920c0112..de1ef2458152c 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -612,6 +612,10 @@ void ScalarBitSetTraits::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH); switch (Object->Header.ABIVersion) { default: // ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags. diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 55dbc1a803e13..4ab2b124ef530 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1002,6 +1002,12 @@ def FeatureGWS : SubtargetFeature<"gws", "Has Global Wave Sync" >; +def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6", + "RequiresCOV6", + "true", + "Target Requires Code Object V6" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -1212,6 +1218,17 @@ def FeatureISAVersion9_0_Common : FeatureSet< FeatureImageInsts, FeatureMadMacF32Insts]>; +def FeatureISAVersion9_0_Consumer_Common : FeatureSet< + !listconcat(FeatureISAVersion9_0_Common.Features, + [FeatureImageGather4D16Bug, + FeatureDsSrc2Insts, + FeatureExtendedImageInsts, + FeatureGDS])>; + +def FeatureISAVersion9_Generic : FeatureSet< + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureRequiresCOV6])>; + def FeatureISAVersion9_0_MI_Common : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, [FeatureFmaMixInsts, @@ -1230,43 +1247,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet< FeatureSupportsSRAMECC])>; def FeatureISAVersion9_0_0 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts])>; def FeatureISAVersion9_0_2 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts])>; def FeatureISAVersion9_0_4 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureFmaMixInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureFmaMixInsts])>; def FeatureISAVersion9_0_6 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - HalfRate64Ops, + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [HalfRate64Ops, FeatureFmaMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, FeatureDLInsts, FeatureDot1Insts, FeatureDot2Insts, FeatureDot7Insts, FeatureDot10Insts, - FeatureSupportsSRAMECC, - FeatureImageGather4D16Bug])>; + FeatureSupportsSRAMECC])>; def FeatureISAVersion9_0_8 : FeatureSet< !listconcat(FeatureISAVersion9_0_MI_Common.Features, @@ -1279,13 +1280,9 @@ def FeatureISAVersion9_0_8 : FeatureSet< FeatureImageGather4D16Bug])>; def FeatureISAVersion9_0_9 : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts, + FeatureImageInsts])>; def FeatureISAVersion9_0_A : FeatureSet< !listconcat(FeatureISAVersion9_0_MI_Common.Features, @@ -1301,12 +1298,8 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureKernargPreload])>; def FeatureISAVersion9_0_C : FeatureSet< - !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureGDS, - FeatureMadMixInsts, - FeatureDsSrc2Insts, - FeatureExtendedImageInsts, - FeatureImageGather4D16Bug])>; + !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, + [FeatureMadMixInsts])>; def FeatureISAVersion9_4_Common : FeatureSet< [FeatureGFX9, @@ -1387,6 +1380,10 @@ def FeatureISAVersion10_1_Common : FeatureSet< FeatureFlatSegmentOffsetBug, FeatureNegativeUnalignedScratchOffsetBug])>; +def FeatureISAVersion10_1_Generic : FeatureSet< + !listconcat(FeatureISAVersion10_1_Common.Features, + [FeatureRequiresCOV6])>; + def FeatureISAVersion10_1_0 : FeatureSet< !listconcat(FeatureISAVersion10_1_Common.Features, [])>; @@ -1426,6 +1423,10 @@ def FeatureISAVersion10_3_0 : FeatureSet< FeatureDot10Insts, FeatureShaderCyclesRegister])>; +def FeatureISAVersion10_3_Generic: FeatureSet< + !listconcat(FeatureISAVersion10_3_0.Features, + [FeatureRequiresCOV6])>; + def FeatureISAVersion11_Common : FeatureSet< [FeatureGFX11, FeatureLDSBankCount32, @@ -1447,6 +1448,16 @@ def FeatureISAVersion11_Common : FeatureSet< FeaturePackedTID, FeatureVcmpxPermlaneHazard]>; +// There are few workarounds that need to be +// added to all targets. This pessimizes codegen +// a bit on the generic GFX11 target. +def FeatureISAVersion11_Generic: FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [FeatureMSAALoadDstSelBug, + FeatureVALUTransUseHazard, + FeatureUserSGPRInit16Bug, + FeatureRequiresCOV6])>; + def FeatureISAVersion11_0_Common : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureMSAALoadDstSelBug, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index db81e1ee9e389..5777a7cabb397 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -156,6 +156,13 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { const GCNSubtarget &STM = MF->getSubtarget(); const Function &F = MF->getFunction(); + // TODO: We're checking this late, would be nice to check it earlier. + if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) { + report_fatal_error( + STM.getCPU() + " is only available on code object version 6 or better", + /*gen_crash_diag*/ false); + } + // TODO: Which one is called first, emitStartOfAsmFile or // emitFunctionBodyStart? if (!getTargetStreamer()->getTargetID()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp index 6f1236fd3b7da..9d44b65d1698c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp @@ -139,10 +139,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) { const GCNSubtarget *ST = static_cast(TM->getSubtargetImpl(F)); - // Check the GPU isn't generic. Generic is used for testing only - // and we don't want this pass to interfere with it. + // Check the GPU isn't generic or generic-hsa. Generic is used for testing + // only and we don't want this pass to interfere with it. StringRef GPUName = ST->getCPU(); - if (GPUName.empty() || GPUName.contains("generic")) + if (GPUName.empty() || GPUName.starts_with("generic")) return false; // Try to fetch the GPU's info. If we can't, it's likely an unknown processor diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 96af1a6aab3da..4671e03d43b3a 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -204,6 +204,11 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, FeatureISAVersion9_4_2.Features >; +// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c] +def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel, + FeatureISAVersion9_Generic.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX10. //===----------------------------------------------------------------------===// @@ -252,6 +257,16 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel, FeatureISAVersion10_3_0.Features >; +// [gfx1010, gfx1011, gfx1012, gfx1013] +def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel, + FeatureISAVersion10_1_Generic.Features +>; + +// [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036] +def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel, + FeatureISAVersion10_3_Generic.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX11. //===----------------------------------------------------------------------===// @@ -280,10 +295,17 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel, FeatureISAVersion11_5_1.Features >; +// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151] +def : ProcessorModel<"gfx11-generic", GFX11SpeedModel, + FeatureISAVersion11_Generic.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX12. //===----------------------------------------------------------------------===// +// TODO: gfx12-generic ? + def : ProcessorModel<"gfx1200", GFX12SpeedModel, FeatureISAVersion12.Features >; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4f8eeaaf500b4..b13b4f7d8f9a2 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -224,6 +224,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasVALUTransUseHazard = false; bool HasForceStoreSC0SC1 = false; + bool RequiresCOV6 = false; + // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1165,6 +1167,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; } + bool requiresCodeObjectV6() const { return RequiresCOV6; } + bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; } /// Return if operations acting on VGPR tuples require even alignment. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 5e9b1674d87dc..a25622c06bfa9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -115,6 +115,10 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; default: AK = GK_NONE; break; } @@ -193,6 +197,10 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; + case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC; + case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC; + case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC; + case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } // clang-format on @@ -659,6 +667,24 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { unsigned Flags = getEFlagsV4(); unsigned Version = ForceGenericVersion; + if (!Version) { + switch (parseArchAMDGCN(STI.getCPU())) { + case AMDGPU::GK_GFX9_GENERIC: + Version = GenericVersion::GFX9; + break; + case AMDGPU::GK_GFX10_1_GENERIC: + Version = GenericVersion::GFX10_1; + break; + case AMDGPU::GK_GFX10_3_GENERIC: + Version = GenericVersion::GFX10_3; + break; + case AMDGPU::GK_GFX11_GENERIC: + Version = GenericVersion::GFX11; + break; + default: + break; + } + } // Versions start at 1. if (Version) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f24b9f0e3615d..ded252c81af3e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -42,6 +42,17 @@ namespace AMDGPU { struct IsaVersion; +/// Generic target versions emitted by this version of LLVM. +/// +/// These numbers are incremented every time a codegen breaking change occurs +/// within a generic family. +namespace GenericVersion { +static constexpr unsigned GFX9 = 1; +static constexpr unsigned GFX10_1 = 1; +static constexpr unsigned GFX10_3 = 1; +static constexpr unsigned GFX11 = 1; +} // namespace GenericVersion + enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 }; /// \returns True if \p STI is AMDHSA. diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 20f324604aa52..684d698521e59 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -126,6 +126,11 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, + + {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, + {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, + {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, + {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, // clang-format on }; @@ -144,6 +149,22 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) { } // namespace +StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) { + switch (AK) { + case AMDGPU::GK_GFX9_GENERIC: + return "gfx9"; + case AMDGPU::GK_GFX10_1_GENERIC: + case AMDGPU::GK_GFX10_3_GENERIC: + return "gfx10"; + case AMDGPU::GK_GFX11_GENERIC: + return "gfx11"; + default: { + StringRef ArchName = getArchNameAMDGCN(AK); + return ArchName.empty() ? "" : ArchName.drop_back(2); + } + } +} + StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) return Entry->CanonicalName; @@ -253,6 +274,24 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { case GK_GFX1151: return {11, 5, 1}; case GK_GFX1200: return {12, 0, 0}; case GK_GFX1201: return {12, 0, 1}; + + // Generic targets return the lowest common denominator + // within their family. That is, the ISA that is the most + // restricted in terms of features. + // + // gfx9-generic is tricky because there is no lowest + // common denominator, so we return gfx900 which has mad-mix + // but this family doesn't have it. + // + // This API should never be used to check for a particular + // feature anyway. + // + // TODO: Split up this API depending on its caller so + // generic target handling is more obvious and less risky. + case GK_GFX9_GENERIC: return {9, 0, 0}; + case GK_GFX10_1_GENERIC: return {10, 1, 0}; + case GK_GFX10_3_GENERIC: return {10, 3, 0}; + case GK_GFX11_GENERIC: return {11, 0, 3}; default: return {0, 0, 0}; } // clang-format on @@ -302,6 +341,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX1102: case GK_GFX1101: case GK_GFX1100: + case GK_GFX11_GENERIC: Features["ci-insts"] = true; Features["dot5-insts"] = true; Features["dot7-insts"] = true; @@ -327,6 +367,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX1032: case GK_GFX1031: case GK_GFX1030: + case GK_GFX10_3_GENERIC: Features["ci-insts"] = true; Features["dot1-insts"] = true; Features["dot2-insts"] = true; @@ -357,6 +398,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, [[fallthrough]]; case GK_GFX1013: case GK_GFX1010: + case GK_GFX10_1_GENERIC: Features["dl-insts"] = true; Features["ci-insts"] = true; Features["16-bit-insts"] = true; @@ -424,6 +466,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX904: case GK_GFX902: case GK_GFX900: + case GK_GFX9_GENERIC: Features["gfx9-insts"] = true; [[fallthrough]]; case GK_GFX810: @@ -510,6 +553,9 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) { case GK_GFX1011: case GK_GFX1013: case GK_GFX1010: + case GK_GFX11_GENERIC: + case GK_GFX10_3_GENERIC: + case GK_GFX10_1_GENERIC: IsWave32Capable = true; break; default: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index 155f4c92f97d3..9698a3894db68 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -1,11 +1,12 @@ -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs| FileCheck --check-prefixes=ALL,HSA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -200,4 +201,4 @@ attributes #1 = { nounwind } !2 = !{i32 1, i32 1, i32 64} !llvm.module.flags = !{!99} -!99 = !{i32 1, !"amdgpu_code_object_version", i32 400} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index 357fcf8ef1561..038219fc37404 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -108,6 +108,13 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s + ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" ; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" ; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" @@ -196,6 +203,13 @@ ; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200" ; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201" +; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-" +; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+" +; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack-" +; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack+" +; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic" +; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic" + define amdgpu_kernel void @directive_amdgcn_target() { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll index 380439d8cd9c6..9ba8176947174 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -77,6 +77,11 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s + ; FIXME: With the default attributes the eflags are not accurate for ; xnack and sramecc. Subsequent Target-ID patches will address this. @@ -149,6 +154,11 @@ ; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A) ; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48) ; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E) + +; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51) +; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52) +; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53) +; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54) ; ALL: ] define amdgpu_kernel void @elf_header() { diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll index dc6fea4ba1a37..1a9334706cb92 100644 --- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s @gds0 = internal addrspace(2) global [4 x i32] undef, align 4 @lds0 = internal addrspace(3) global [4 x i32] undef, align 128 diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll index 3e4e6938d72eb..8d44330b1b973 100644 --- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll @@ -2,6 +2,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; FUNC-LABEL: {{^}}atomic_add_ret_gds: ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll new file mode 100644 index 0000000000000..e3f4b14bac0c1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll @@ -0,0 +1,18 @@ +; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s + +; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s + +; GFX9-V5: gfx9-generic is only available on code object version 6 or better +; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better +; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better +; GFX11-V5: gfx11-generic is only available on code object version 6 or better + +define void @foo() { + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll new file mode 100644 index 0000000000000..4fee563d1cc93 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll @@ -0,0 +1,31 @@ +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck -check-prefix=CU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck -check-prefix=CU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck -check-prefix=CU %s + +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s + +; Checks 10.1, 10.3 and 11 generic targets allow cumode/wave64. + +; NOCU: .amdhsa_workgroup_processor_mode 0 +; NOCU: .workgroup_processor_mode: 0 +; CU: .amdhsa_workgroup_processor_mode 1 +; CU: .workgroup_processor_mode: 1 + +; W64: .amdhsa_wavefront_size32 0 +; W32: .amdhsa_wavefront_size32 1 + +define amdgpu_kernel void @wavefrontsize() { +entry: + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 600} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll index 91284d3838675..cf324d62e1de1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll @@ -1,8 +1,11 @@ ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s ; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll index 7dc139ef96974..10e1ae3ecfcbd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -1,8 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll index e8b9526774d89..b520dd1060ec8 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -2,12 +2,14 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s @@ -30,6 +32,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -79,6 +90,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -134,6 +154,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> % ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -193,6 +222,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v6, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v5 +; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v4, v6 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_v2f32: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -242,6 +284,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v5, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v5 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v4, v6 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_v2f32: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -300,6 +355,19 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, ; GFX906-NEXT: v_mov_b32_e32 v0, v3 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_mad_f32 v0, v3, v0, v2 +; GFX9GEN-NEXT: v_mac_f32_e32 v2, v4, v1 +; GFX9GEN-NEXT: v_mov_b32_e32 v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_v2f32_shuffle: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -378,6 +446,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -393,6 +470,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -437,6 +523,15 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -487,6 +582,15 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9GEN-NEXT: v_mad_f32 v0, -|v0|, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -538,6 +642,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -584,6 +696,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -631,6 +751,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, |v2| +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -678,6 +806,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -|v2| +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -734,6 +870,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 1.0 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -806,6 +950,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 0.15915494 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -885,6 +1037,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -921,6 +1081,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x3e230000 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -969,6 +1138,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { ; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1005,6 +1182,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { ; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x367c0000 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1058,6 +1244,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 1.0 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 1.0 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1112,6 +1309,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 1.0 +; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 1.0 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1168,6 +1376,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000 +; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000 +; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v2, v4 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1224,6 +1444,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000 +; GISEL-GFX9GEN-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v4 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1283,6 +1515,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s ; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 0.15915494 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 0.15915494 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1338,6 +1581,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 0.15915494 +; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 0.15915494 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1386,6 +1640,15 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1440,6 +1703,12 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { ; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: no_mix_simple: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: no_mix_simple: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1474,6 +1743,12 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { ; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: no_mix_simple_fabs: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: no_mix_simple_fabs: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1516,6 +1791,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1568,6 +1852,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, fl ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1628,6 +1920,16 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1691,6 +1993,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half ; GFX906-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1741,6 +2052,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, hal ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1791,6 +2111,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1838,6 +2166,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1854,6 +2191,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1 ; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1910,6 +2256,15 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half % ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1966,6 +2321,15 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: +; GFX9GEN: ; %bb.0: +; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2021,6 +2385,15 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2038,6 +2411,16 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2087,6 +2470,15 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2104,6 +2496,16 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2153,6 +2555,15 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; +; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; SDAG-GFX9GEN: ; %bb.0: +; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: ; SDAG-VI: ; %bb.0: ; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2170,6 +2581,16 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, ; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 ; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; GISEL-GFX9GEN: ; %bb.0: +; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9GEN-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll index 0878fc689cd4b..b08586efe2f21 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll @@ -1,15 +1,13 @@ -; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s -; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s ; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s -; GFX906-LABEL: image_sample_test: -; GFX906: image_sample_lz - -; GFX908-LABEL: image_sample_test: -; GFX908: image_sample_lz +; GFX9-LABEL: image_sample_test: +; GFX9: image_sample_lz ; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml index 7fb33ca662b19..4c2b4479a4aa3 100644 --- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -238,6 +238,23 @@ # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1201 %s # RUN: obj2yaml %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1201 %s +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX9_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s + +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s + +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX10_3_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_3_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_3_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_3_GENERIC %s + +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX11_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX11_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX11_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX11_GENERIC %s + + # ELF-R600-ALL: Format: elf32-amdgpu # ELF-R600-ALL: Arch: r600 # ELF-R600-ALL: AddressSize: 32bit @@ -435,6 +452,18 @@ # ELF-AMDGCN-GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E) # YAML-AMDGCN-GFX1201: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1201 ] +# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51) +# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ] + +# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52) +# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ] + +# ELF-AMDGCN-GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53) +# YAML-AMDGCN-GFX10_3_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC ] + +# ELF-AMDGCN-GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54) +# YAML-AMDGCN-GFX11_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC ] + # ELF-AMDGCN-ALL: ] diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll index e296d7fb1fc8f..ca136a6a0d5ba 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -18,6 +18,11 @@ define amdgpu_kernel void @test_kernel() { ; ----------------------------------GFX11-------------------------------------- ; +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx11-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt @@ -49,6 +54,11 @@ define amdgpu_kernel void @test_kernel() { ; RUN: diff %t-specify.txt %t-detect.txt ; ----------------------------------GFX10-------------------------------------- +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1036 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt @@ -84,6 +94,11 @@ define amdgpu_kernel void @test_kernel() { ; RUN: llvm-objdump -D %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt @@ -107,6 +122,11 @@ define amdgpu_kernel void @test_kernel() { ; ----------------------------------GFX9--------------------------------------- ; +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test index e2266d81d1a59..7fbf4aae6c246 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test @@ -253,6 +253,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42 +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC -DFLAG_VALUE=0x52 + # RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42 @@ -322,6 +325,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45 +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC -DFLAG_VALUE=0x53 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3" # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F @@ -355,6 +361,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41 @@ -391,6 +400,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 -DFLAG_VALUE=0x44 +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC -DFLAG_VALUE=0x54 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 -DFLAG_VALUE=0x43 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 82bb12f95d3a3..8e68f08c3fa9a 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1559,68 +1559,72 @@ const EnumEntry ElfHeaderMipsFlags[] = { }; // clang-format off -#define AMDGPU_MACH_ENUM_ENTS \ - ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \ - ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201") +#define AMDGPU_MACH_ENUM_ENTS \ + ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \ + ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic") // clang-format on const EnumEntry ElfHeaderAMDGPUFlagsABIVersion3[] = {