From fd6e7a61a16a17fa155cbd717de0c79001af71e6 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Mon, 23 Sep 2019 11:18:56 -0700 Subject: [PATCH] Fix CUDA version detection in CUB This fixes the problem with CUB using deprecated shfl/vote instructions when CUB is compiled with clang (e.g. some TensorFlow builds). --- cub/util_arch.cuh | 3 ++- cub/util_type.cuh | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cub/util_arch.cuh b/cub/util_arch.cuh index 87c5ea2fb..9ad9d1cbb 100644 --- a/cub/util_arch.cuh +++ b/cub/util_arch.cuh @@ -44,7 +44,8 @@ namespace cub { #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document -#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) +#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \ + (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) #define CUB_USE_COOPERATIVE_GROUPS #endif diff --git a/cub/util_type.cuh b/cub/util_type.cuh index 0ba41e1ed..b2433d735 100644 --- a/cub/util_type.cuh +++ b/cub/util_type.cuh @@ -37,7 +37,7 @@ #include #include -#if (__CUDACC_VER_MAJOR__ >= 9) +#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) #include #endif @@ -1063,7 +1063,7 @@ struct FpLimits }; -#if (__CUDACC_VER_MAJOR__ >= 9) +#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) template <> struct FpLimits<__half> {