Skip to content

Commit

Permalink
Add gfx900:xnack-, gfx1032, gfx1034, gfx1035 (#1595)
Browse files Browse the repository at this point in the history
* Allow gfx900:xnack-

The miscommunication relating to whether gfx900 should be used with
or without gfx900:xnack- was never properly resolved and so many of the
libraries that depend on rocBLAS use gfx900:xnack- in their default
configuration.

This change allows both gfx900 and gfx900:xnack- to be accepted by
Tensile, thereby allowing users that are building from source to
choose whatever they prefer.

* Add gfx1032, gfx1034, gfx1035

This change allows users to build rocBLAS from source for Navi23, Navi24
and Rembrandt.
  • Loading branch information
cgmb authored Sep 23, 2022
1 parent 6cfd8a7 commit 41236e3
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 4 deletions.
4 changes: 2 additions & 2 deletions Tensile/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,12 @@

# Translate GPU targets to filter filenames in Tensile_LOGIC directory
architectureMap = {
'all':'_','gfx000':'none', 'gfx803':'r9nano', 'gfx900':'vega10',
'all':'_','gfx000':'none', 'gfx803':'r9nano', 'gfx900':'vega10', 'gfx900:xnack-':'vega10',
'gfx906':'vega20', 'gfx906:xnack+':'vega20', 'gfx906:xnack-':'vega20',
'gfx908':'arcturus','gfx908:xnack+':'arcturus', 'gfx908:xnack-':'arcturus',
'gfx90a':'aldebaran', 'gfx90a:xnack+':'aldebaran', 'gfx90a:xnack-':'aldebaran',
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
'gfx1030':'navi21', 'gfx1031':'navi22',
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33'
}

Expand Down
4 changes: 2 additions & 2 deletions Tensile/Source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ if(NOT DEFINED CXX_VERSION_STRING)
endif()

if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
else()
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
endif()

include(CMakeDependentOption)
Expand Down
9 changes: 9 additions & 0 deletions Tensile/Source/lib/include/Tensile/AMDGPU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ namespace Tensile
gfx1012 = 1012,
gfx1030 = 1030,
gfx1031 = 1031,
gfx1032 = 1032,
gfx1034 = 1034,
gfx1035 = 1035,
gfx1100 = 1100,
gfx1101 = 1101,
gfx1102 = 1102
Expand Down Expand Up @@ -93,6 +96,12 @@ namespace Tensile
return "gfx1030";
case AMDGPU::Processor::gfx1031:
return "gfx1031";
case AMDGPU::Processor::gfx1032:
return "gfx1032";
case AMDGPU::Processor::gfx1034:
return "gfx1034";
case AMDGPU::Processor::gfx1035:
return "gfx1035";
case AMDGPU::Processor::gfx1100:
return "gfx1100";
case AMDGPU::Processor::gfx1101:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ namespace Tensile
iot::enumCase(io, value, "gfx1012", AMDGPU::Processor::gfx1012);
iot::enumCase(io, value, "gfx1030", AMDGPU::Processor::gfx1030);
iot::enumCase(io, value, "gfx1031", AMDGPU::Processor::gfx1031);
iot::enumCase(io, value, "gfx1032", AMDGPU::Processor::gfx1032);
iot::enumCase(io, value, "gfx1034", AMDGPU::Processor::gfx1034);
iot::enumCase(io, value, "gfx1035", AMDGPU::Processor::gfx1035);
iot::enumCase(io, value, "gfx1100", AMDGPU::Processor::gfx1100);
iot::enumCase(io, value, "gfx1101", AMDGPU::Processor::gfx1101);
iot::enumCase(io, value, "gfx1102", AMDGPU::Processor::gfx1102);
Expand Down
6 changes: 6 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ markers =
xfail-gfx1012: architecture
xfail-gfx1030: architecture
xfail-gfx1031: architecture
xfail-gfx1032: architecture
xfail-gfx1034: architecture
xfail-gfx1035: architecture
xfail-gfx1100: architecture
xfail-gfx1101: architecture
xfail-gfx1102: architecture
Expand All @@ -103,6 +106,9 @@ markers =
skip-gfx1012: architecture
skip-gfx1030: architecture
skip-gfx1031: architecture
skip-gfx1032: architecture
skip-gfx1034: architecture
skip-gfx1035: architecture
skip-gfx1100: architecture
skip-gfx1101: architecture
skip-gfx1102: architecture

0 comments on commit 41236e3

Please sign in to comment.