PaddlePaddle · zhoutianzi666 · May 31, 2024 · May 27, 2024 · May 28, 2024 · May 28, 2024
diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt
@@ -55,6 +55,21 @@ if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
 endif()
 
 if(WITH_CUTLASS)
+  add_custom_target(
+    gemm_epilogue_compile_script ALL
+    COMMAND bash compile.sh "${PYTHON_EXECUTABLE}" "${CUDA_TOOLKIT_ROOT_DIR}"
+            \"${NVCC_ARCH_BIN}\" "${CMAKE_COMMAND}"
+    WORKING_DIRECTORY
+      ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/gemm_epilogue
+    COMMENT "GemmEpilogue compile script")
+  add_custom_target(
+    fused_conv2d_add_act_compile_script ALL
+    COMMAND bash compile.sh "${PYTHON_EXECUTABLE}" "${CUDA_TOOLKIT_ROOT_DIR}"
+            \"${NVCC_ARCH_BIN}\" "${CMAKE_COMMAND}"
+    WORKING_DIRECTORY
+      ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/conv2d
+    COMMENT "FusedConv2dAddAct compile script")
+
   execute_process(
     COMMAND
       ${PYTHON_EXECUTABLE}

diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/CMakeLists.txt b/paddle/phi/kernels/fusion/cutlass/conv2d/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.23)
+cmake_minimum_required(VERSION 3.18)
 
 if(NOT DEFINED PYTHON_EXECUTABLE)
   message(

diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/compile.sh b/paddle/phi/kernels/fusion/cutlass/conv2d/compile.sh
@@ -13,21 +13,38 @@
 # limitations under the License.
 set -e
 
-cutlass_repo_directory="cutlass"
-if [ ! -d "$cutlass_repo_directory" ]; then
-     git clone --branch v3.0.0  https://github.com/NVIDIA/cutlass
-fi
-
 build_directory="build"
 if [ ! -d "$build_directory" ]; then
     mkdir $build_directory
 fi
 
-python_exe_path="python"
-cuda_root_path="/usr/local/cuda"
-gpu_cc="80"
+libname="$build_directory/libCutlassConv2d.so"
+if [ -e "$libname" ]; then
+    exit 0 
+fi
+
+default_python_exe_path="/usr/bin/python"
+default_cuda_root_path="/usr/local/cuda"
+default_gpu_cc="80"
+default_cmake_command="cmake"
+
+python_exe_path="${1:-$default_python_exe_path}"  
+cuda_root_path="${2:-$default_cuda_root_path}"  
+gpu_cc="${3:-$default_gpu_cc}"
+cmake_command="${4:-$default_cmake_command}" 
+
+case "$gpu_cc" in  
+    75|80|86|89)  ;;  
+    *)  exit 0  ;;  
+esac
+
+cutlass_repo_directory="cutlass"
+if [ ! -d "$cutlass_repo_directory" ]; then
+    git clone --branch v3.0.0  https://github.com/NVIDIA/cutlass
+fi
+
 
 cd $build_directory
-cmake .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc
-make -j 
+$cmake_command .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc
+make -j8
 cd -
diff --git a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/CMakeLists.txt b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.23)
+cmake_minimum_required(VERSION 3.18)
 
 if(NOT DEFINED PYTHON_EXECUTABLE)
   message(

diff --git a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/compile.sh b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/compile.sh
@@ -13,21 +13,38 @@
 # limitations under the License.
 set -e
 
-cutlass_repo_directory="cutlass"
-if [ ! -d "$cutlass_repo_directory" ]; then
-    git clone --branch v2.11.0  https://github.com/NVIDIA/cutlass
-fi
-
 build_directory="build"
 if [ ! -d "$build_directory" ]; then
     mkdir $build_directory
 fi
 
-python_exe_path="/usr/bin/python"
-cuda_root_path="/usr/local/cuda"
-gpu_cc="80"
+libname="$build_directory/libCutlassGemmEpilogue.so"
+if [ -e "$libname" ]; then
+    exit 0 
+fi
+
+default_python_exe_path="/usr/bin/python"
+default_cuda_root_path="/usr/local/cuda"
+default_gpu_cc="80"
+default_cmake_command="cmake"
+
+python_exe_path="${1:-$default_python_exe_path}"  
+cuda_root_path="${2:-$default_cuda_root_path}"  
+gpu_cc="${3:-$default_gpu_cc}"
+cmake_command="${4:-$default_cmake_command}" 
+
+case "$gpu_cc" in  
+    80|86|89)  ;;  
+    *)  exit 0  ;;  
+esac
+
+cutlass_repo_directory="cutlass"
+if [ ! -d "$cutlass_repo_directory" ]; then
+    git clone --branch v2.11.0  https://github.com/NVIDIA/cutlass
+fi
+
 
 cd $build_directory
-cmake .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc
-make -j 
+$cmake_command .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc
+make -j8
 cd -
diff --git a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_util.h b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_util.h
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #pragma once
+#include <cuda_bf16.h>
+#include <cuda_fp16.h>
 #include <vector>
 
 #include "paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_decl.h"