From 7c2f219296e612b2c1c88234b3d98483d007db4f Mon Sep 17 00:00:00 2001 From: Vin Huang Date: Thu, 19 Dec 2024 18:09:33 +0000 Subject: [PATCH 1/2] Support cusparseLt v0.6.3 as backend. --- clients/benchmarks/client.cpp | 6 ---- clients/include/spmm/testing_spmm.hpp | 4 +-- clients/include/utility.hpp | 4 +-- .../samples/example_spmm_strided_batched.cpp | 22 ++++++++++---- library/CMakeLists.txt | 5 ++-- library/include/hipsparselt.h | 1 + library/src/auxiliary.cpp | 5 ++++ library/src/include/auxiliary.hpp | 17 ++++++++--- library/src/nvcc_detail/hipsparselt.cpp | 29 ++++++++++--------- 9 files changed, 57 insertions(+), 36 deletions(-) diff --git a/clients/benchmarks/client.cpp b/clients/benchmarks/client.cpp index 1f63204e..bfee6325 100644 --- a/clients/benchmarks/client.cpp +++ b/clients/benchmarks/client.cpp @@ -89,14 +89,8 @@ struct perf_sparse< Tc, TBias, std::enable_if_t< -#ifdef __HIP_PLATFORM_AMD__ (std::is_same{} && (std::is_same{} || std::is_same{}) && std::is_same{}) -#else - (std::is_same{} - && ((std::is_same{} && std::is_same{}) - || (std::is_same{} && std::is_same{}))) -#endif || (std::is_same{} && (std::is_same{}) && std::is_same{}) || (std::is_same{} && (std::is_same{}) && std::is_same{}) diff --git a/clients/include/spmm/testing_spmm.hpp b/clients/include/spmm/testing_spmm.hpp index e89058b9..50ce2e67 100644 --- a/clients/include/spmm/testing_spmm.hpp +++ b/clients/include/spmm/testing_spmm.hpp @@ -845,7 +845,7 @@ void testing_spmm(const Arguments& arg) hD_gold_act + stride_d * i, ldd, tSizeD, - arg.alpha_vector_scaling ? hAlpahVector : nullptr, + arg.alpha_vector_scaling ? hAlpahVector : (float*)nullptr, false); auto pos = stride_d * i; @@ -916,7 +916,7 @@ void testing_spmm(const Arguments& arg) hD_gold + stride_d * i, ldd, tSizeD, - arg.alpha_vector_scaling ? hAlpahVector : nullptr, + arg.alpha_vector_scaling ? hAlpahVector : (float*)nullptr, false); } #undef activation_param diff --git a/clients/include/utility.hpp b/clients/include/utility.hpp index 6c5907b4..25c7d0f7 100644 --- a/clients/include/utility.hpp +++ b/clients/include/utility.hpp @@ -545,8 +545,8 @@ inline hipsparseStatus_t expected_hipsparse_status_of_matrix_size(hipDataType switch(type) { case HIP_R_8I: - case HIP_R_8F_E4M3_FNUZ: - case HIP_R_8F_E5M2_FNUZ: + case HIP_R_8F_E4M3: + case HIP_R_8F_E5M2: if(isSparse) row_ = col_ = ld_ = 32; else diff --git a/clients/samples/example_spmm_strided_batched.cpp b/clients/samples/example_spmm_strided_batched.cpp index 54be9214..1e75c583 100644 --- a/clients/samples/example_spmm_strided_batched.cpp +++ b/clients/samples/example_spmm_strided_batched.cpp @@ -113,16 +113,26 @@ inline bool AlmostEqual(__half a, __half b) _Float16 data; }; - _HALF a_half = {__half_raw(a).x}; - _HALF b_half = {__half_raw(b).x}; + _HALF a_half = {__half_raw(a).x}; + _HALF b_half = {__half_raw(b).x}; + _HALF zero_half = {__half_raw(static_cast<__half>(0)).x}; + _HALF one_half = {__half_raw(static_cast<__half>(1)).x}; + _HALF e_n2_half = {__half_raw(static_cast<__half>(0.01)).x}; + auto a_data = a_half.data; auto b_data = b_half.data; + auto zero = zero_half.data; + auto one = one_half.data; + auto e_n2 = e_n2_half.data; #else auto a_data = a; auto b_data = b; + auto zero = __half(0); + auto one = __half(1); + auto e_n2 = __half(0.01); #endif - auto absA = (a_data > 0.0) ? a_data : static_cast(-a_data); - auto absB = (b_data > 0.0) ? b_data : static_cast(-b_data); + auto absA = (a_data > zero) ? a_data : static_cast(-a_data); + auto absB = (b_data > zero) ? b_data : static_cast(-b_data); // this avoids NaN when inf is compared against inf in the alternative code // path if(static_cast(absA) == std::numeric_limits::infinity() @@ -135,8 +145,8 @@ inline bool AlmostEqual(__half a, __half b) { return a_data == b_data; } - auto absDiff = (a_data - b_data > 0) ? a_data - b_data : b_data - a_data; - return absDiff / (absA + absB + 1) < 0.01; + auto absDiff = (a_data - b_data > zero) ? a_data - b_data : b_data - a_data; + return absDiff / (absA + absB + one) < e_n2; } inline void extract_metadata(unsigned metadata, int& a, int& b, int& c, int& d) diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt index aac85065..84db9e04 100644 --- a/library/CMakeLists.txt +++ b/library/CMakeLists.txt @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright (c) 2022 Advanced Micro Devices, Inc. +# Copyright (c) 2022-2024 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -142,7 +142,8 @@ if(NOT BUILD_CUDA) # Target link libraries target_link_libraries(hipsparselt PRIVATE hip::device ${DL_LIB}) else() - target_link_libraries(hipsparselt PRIVATE /usr/lib/x86_64-linux-gnu/libcusparseLt.so ${CUDA_CUSPARSE_LIBRARY}) + find_library(CUDA_CUSPARSELT_LIBRARY NAMES cusparseLt PATHS /usr/lib/x86_64-linux-gnu /usr/local/cuda/lib64 REQUIRED) + target_link_libraries(hipsparselt PRIVATE ${CUDA_CUSPARSELT_LIBRARY} ${CUDA_CUSPARSE_LIBRARY}) endif() # Target properties diff --git a/library/include/hipsparselt.h b/library/include/hipsparselt.h index 6becfb93..8b94b54a 100644 --- a/library/include/hipsparselt.h +++ b/library/include/hipsparselt.h @@ -236,6 +236,7 @@ typedef enum { When Input's datatype is FP16 - Bias type can be FP16 or FP32. (default FP16) When Input's datatype is BF16 - Bias type can be BF16 or FP32. (default BF16) In other cases - Bias type is FP32.*/ + HIPSPARSELT_MATMUL_SPARSE_MAT_POINTER = 17, /**< Pointer to the pruned sparse matrix. */ } hipsparseLtMatmulDescAttribute_t; /*! \ingroup types_module diff --git a/library/src/auxiliary.cpp b/library/src/auxiliary.cpp index 4a9f9ea9..27a18a1c 100644 --- a/library/src/auxiliary.cpp +++ b/library/src/auxiliary.cpp @@ -44,8 +44,13 @@ const hipDataType string_to_hip_datatype(const std::string& value) value == "f16_r" || value == "h" ? HIP_R_16F : value == "bf16_r" ? HIP_R_16BF : value == "i8_r" ? HIP_R_8I : +#ifdef __HIP_PLATFORM_AMD__ value == "f8_r" ? HIP_R_8F_E4M3_FNUZ : value == "bf8_r" ? HIP_R_8F_E5M2_FNUZ : +#else + value == "f8_r" ? HIP_R_8F_E4M3 : + value == "bf8_r" ? HIP_R_8F_E5M2 : +#endif static_cast(-1); } diff --git a/library/src/include/auxiliary.hpp b/library/src/include/auxiliary.hpp index 698e526d..422d2ad1 100644 --- a/library/src/include/auxiliary.hpp +++ b/library/src/include/auxiliary.hpp @@ -144,9 +144,15 @@ constexpr const char* hip_datatype_to_string(hipDataType type) return "bf16_r"; case HIP_R_8I: return "i8_r"; +#ifdef __HIP_PLATFORM_AMD__ case HIP_R_8F_E4M3_FNUZ: +#endif + case HIP_R_8F_E4M3: return "f8_r"; +#ifdef __HIP_PLATFORM_AMD__ case HIP_R_8F_E5M2_FNUZ: +#endif + case HIP_R_8F_E5M2: return "bf8_r"; default: return "invalid"; @@ -165,10 +171,6 @@ constexpr const char* hipsparselt_computetype_to_string(hipsparseLtComputetype_t return "i32_r"; case HIPSPARSELT_COMPUTE_32F: return "f32_r"; - case HIPSPARSELT_COMPUTE_TF32: - return "tf32_r"; - case HIPSPARSELT_COMPUTE_TF32_FAST: - return "tf32f_r"; } return "invalid"; } @@ -223,6 +225,13 @@ __host__ __device__ inline bool hipsparselt_isnan(__half arg) return (~x.x & 0x7c00) == 0 && (x.x & 0x3ff) != 0; } +#ifdef __HIP_PLATFORM_NVIDIA__ +__host__ __device__ inline bool hipsparselt_isnan(__nv_bfloat16 arg) +{ + return __hisnan(arg); +} +#endif + /******************************************************************************* * \brief returns true if arg is Infinity ********************************************************************************/ diff --git a/library/src/nvcc_detail/hipsparselt.cpp b/library/src/nvcc_detail/hipsparselt.cpp index b158c8b3..8d48921e 100644 --- a/library/src/nvcc_detail/hipsparselt.cpp +++ b/library/src/nvcc_detail/hipsparselt.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #define TO_STR2(x) #x #define TO_STR(x) TO_STR2(x) @@ -114,7 +115,7 @@ hipsparseStatus_t hipCUSPARSEStatusToHIPStatus(cusparseStatus_t cuStatus) } /* @deprecated */ -cudaDataType HIPDatatypeToCuSparseLtDatatype(hipsparseLtDatatype_t type) +cudaDataType HIPSparseLtDatatypeToCuSparseLtDatatype(hipsparseLtDatatype_t type) { switch(type) { @@ -185,10 +186,8 @@ cusparseComputeType HIPComputetypeToCuSparseComputetype(hipsparseLtComputetype_t return CUSPARSE_COMPUTE_16F; case HIPSPARSELT_COMPUTE_32I: return CUSPARSE_COMPUTE_32I; - case HIPSPARSELT_COMPUTE_TF32: - return CUSPARSE_COMPUTE_TF32; - case HIPSPARSELT_COMPUTE_TF32_FAST: - return CUSPARSE_COMPUTE_TF32_FAST; + case HIPSPARSELT_COMPUTE_32F: + return CUSPARSE_COMPUTE_32F; default: throw HIPSPARSE_STATUS_NOT_SUPPORTED; } @@ -202,10 +201,8 @@ hipsparseLtComputetype_t CuSparseLtComputetypeToHIPComputetype(cusparseComputeTy return HIPSPARSELT_COMPUTE_16F; case CUSPARSE_COMPUTE_32I: return HIPSPARSELT_COMPUTE_32I; - case CUSPARSE_COMPUTE_TF32: - return HIPSPARSELT_COMPUTE_TF32; - case CUSPARSE_COMPUTE_TF32_FAST: - return HIPSPARSELT_COMPUTE_TF32_FAST; + case CUSPARSE_COMPUTE_32F: + return HIPSPARSELT_COMPUTE_32F; default: throw HIPSPARSE_STATUS_NOT_SUPPORTED; } @@ -312,6 +309,8 @@ cusparseLtMatmulDescAttribute_t return CUSPARSELT_MATMUL_BIAS_STRIDE; case HIPSPARSELT_MATMUL_BIAS_POINTER: return CUSPARSELT_MATMUL_BIAS_POINTER; + case HIPSPARSELT_MATMUL_SPARSE_MAT_POINTER: + return CUSPARSELT_MATMUL_SPARSE_MAT_POINTER; default: throw HIPSPARSE_STATUS_NOT_SUPPORTED; } @@ -340,6 +339,8 @@ hipsparseLtMatmulDescAttribute_t return HIPSPARSELT_MATMUL_BIAS_STRIDE; case CUSPARSELT_MATMUL_BIAS_POINTER: return HIPSPARSELT_MATMUL_BIAS_POINTER; + case CUSPARSELT_MATMUL_SPARSE_MAT_POINTER: + return HIPSPARSELT_MATMUL_SPARSE_MAT_POINTER; default: throw HIPSPARSE_STATUS_NOT_SUPPORTED; } @@ -531,7 +532,9 @@ hipsparseStatus_t hipsparseLtInit(hipsparseLtHandle_t* handle) if((log_env = getenv("HIPSPARSELT_LOG_MASK")) != NULL) { int mask = strtol(log_env, nullptr, 0); - setenv("CUSPARSELT_LOG_MASK", std::to_string(mask).c_str(), 0); + char mask_str[11]; + snprintf(mask_str, 11, "%d",mask); + setenv("CUSPARSELT_LOG_MASK", mask_str, 0); } if((log_env = getenv("HIPSPARSELT_LOG_FILE")) != NULL) { @@ -967,10 +970,8 @@ catch(...) hipsparseStatus_t hipsparseLtGetArchName(char** archName) try { - *archName = nullptr; - std::string arch = "cuda"; - *archName = (char*)malloc(arch.size() * sizeof(char)); - strncpy(*archName, arch.c_str(), arch.size()); + *archName = (char*)malloc(5); + snprintf(*archName, 5, "cuda\0"); return HIPSPARSE_STATUS_SUCCESS; } catch(...) From 1da8fc07b74b6ff66345a650f75e5d61b2c92718 Mon Sep 17 00:00:00 2001 From: Vin Huang Date: Tue, 24 Dec 2024 04:03:21 +0000 Subject: [PATCH 2/2] Update document for cusparselt 0.6.3 backend --- README.md | 4 +-- docs/reference/data-type-support.rst | 45 +++++++++++++++----------- docs/reference/supported-functions.rst | 2 +- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index be1fcee2..01a59855 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ It sits between the application and a 'worker' SPARSE library, marshalling inputs into the backend library and marshalling results back to the application. hipSPARSELt exports an interface that does not require the client to change, regardless of the chosen backend. Currently, hipSPARSELt supports -[rocSPARSELt](library/src/hcc_detial/rocsparselt) and [NVIDIA CUDA cuSPARSELt v0.4](https://docs.nvidia.com/cuda/cusparselt) +[rocSPARSELt](library/src/hcc_detial/rocsparselt) and [NVIDIA CUDA cuSPARSELt v0.6.3](https://docs.nvidia.com/cuda/cusparselt) as backends. > [!NOTE] @@ -87,7 +87,7 @@ so it may prompt you for a password. * Add kernel selection and genroator, used to provide the appropriate solution for the specific problem. * CUDA - * Support cusparseLt v0.4 + * Support cusparseLt v0.6.3 ## Documentation diff --git a/docs/reference/data-type-support.rst b/docs/reference/data-type-support.rst index f1a51311..ec614823 100644 --- a/docs/reference/data-type-support.rst +++ b/docs/reference/data-type-support.rst @@ -26,14 +26,14 @@ Data type support - ✅ * - float8 - - HIP_R_8F_E4M3_FNUZ - - ❌ + - HIP_R_8F_E4M3 - ❌ + - ✅ * - bfloat8 - - HIP_R_8F_E5M2_FNUZ - - ❌ + - HIP_R_8F_E5M2 - ❌ + - ✅ * - int16 - Not Supported @@ -51,9 +51,9 @@ Data type support - ✅ * - int32 - - Not Supported - - ❌ + - HIP_R_32I - ❌ + - ✅ * - tensorfloat32 - Not Supported @@ -120,12 +120,12 @@ Data type support - tensorfloat32 - Not Supported - ❌ - - ✅ + - ❌ * - float32 - HIPSPARSELT_COMPUTE_32F - ✅ - - ❌ + - ✅ * - float64 - Not Supported @@ -135,14 +135,23 @@ Data type support * List of supported compute types at specific input and output types: .. csv-table:: - :header: "Input", "Output", "Compute type", "Backend" + :header: "Input A/B", "Input C", "Output D", "Compute type", "Backend" - "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "HIP" - "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "HIP" - "HIP_R_8I", "HIP_R_8I", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" - "HIP_R_8I", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" - "HIP_R_8I", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" - "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_16F", "CUDA" - "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_16F", "CUDA" - "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_TF32", "CUDA" - "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_TF32_FAST", "CUDA" + "HIP_R_32F", "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_16F", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "HIP / CUDA" + "HIP_R_16F", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_16F", "CUDA" + "HIP_R_16BF", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "HIP / CUDA" + "HIP_R_8I", "HIP_R_8I", "HIP_R_8I", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" + "HIP_R_8I", "HIP_R_32I", "HIP_R_32I", "HIPSPARSELT_COMPUTE_32I", "CUDA" + "HIP_R_8I", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" + "HIP_R_8I", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32I", "HIP / CUDA" + "HIP_R_8F_E4M3", "HIP_R_16F", "HIP_R_8F_E4M3", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_16BF", "HIP_R_8F_E4M3", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E4M3", "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16F", "HIP_R_8F_E5M2", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16BF", "HIP_R_8F_E5M2", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16F", "HIP_R_16F", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_16BF", "HIP_R_16BF", "HIPSPARSELT_COMPUTE_32F", "CUDA" + "HIP_R_8F_E5M2", "HIP_R_32F", "HIP_R_32F", "HIPSPARSELT_COMPUTE_32F", "CUDA" diff --git a/docs/reference/supported-functions.rst b/docs/reference/supported-functions.rst index c35c529b..3e9dc27c 100644 --- a/docs/reference/supported-functions.rst +++ b/docs/reference/supported-functions.rst @@ -48,4 +48,4 @@ ROCm & CUDA supported functions * CUDA - * Support cuSPARSELt v0.4 + * Support cuSPARSELt v0.6.3