diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h | 505 |
1 files changed, 254 insertions, 251 deletions
diff --git a/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h b/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h index ef7e087b832c..11e1e7d03258 100644 --- a/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h +++ b/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h @@ -14,9 +14,6 @@ #endif #if !defined(__HIPCC_RTC__) -#if defined(__cplusplus) -#include <algorithm> -#endif #include <limits.h> #include <stdint.h> #ifdef __OPENMP_AMDGCN__ @@ -32,6 +29,17 @@ #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif +// Device library provides fast low precision and slow full-recision +// implementations for some functions. Which one gets selected depends on +// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if +// -ffast-math or -fgpu-approx-transcendentals are in effect. +#pragma push_macro("__FAST_OR_SLOW") +#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) +#define __FAST_OR_SLOW(fast, slow) fast +#else +#define __FAST_OR_SLOW(fast, slow) slow +#endif + // A few functions return bool type starting only in C++11. #pragma push_macro("__RETURN_TYPE") #ifdef __OPENMP_AMDGCN__ @@ -70,9 +78,9 @@ __DEVICE__ void __static_assert_equal_size() { #endif __DEVICE__ -uint64_t __make_mantissa_base8(const char *__tagp) { +uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; - while (__tagp) { + while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '7') @@ -87,9 +95,9 @@ uint64_t __make_mantissa_base8(const char *__tagp) { } __DEVICE__ -uint64_t __make_mantissa_base10(const char *__tagp) { +uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; - while (__tagp) { + while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '9') @@ -104,9 +112,9 @@ uint64_t __make_mantissa_base10(const char *__tagp) { } __DEVICE__ -uint64_t __make_mantissa_base16(const char *__tagp) { +uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; - while (__tagp) { + while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '9') @@ -125,10 +133,7 @@ uint64_t __make_mantissa_base16(const char *__tagp) { } __DEVICE__ -uint64_t __make_mantissa(const char *__tagp) { - if (!__tagp) - return 0u; - +uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) { if (*__tagp == '0') { ++__tagp; @@ -142,21 +147,180 @@ uint64_t __make_mantissa(const char *__tagp) { } // BEGIN FLOAT + +// BEGIN INTRINSICS + +__DEVICE__ +float __cosf(float __x) { return __ocml_native_cos_f32(__x); } + +__DEVICE__ +float __exp10f(float __x) { + const float __log2_10 = 0x1.a934f0p+1f; + return __builtin_amdgcn_exp2f(__log2_10 * __x); +} + +__DEVICE__ +float __expf(float __x) { + const float __log2_e = 0x1.715476p+0; + return __builtin_amdgcn_exp2f(__log2_e * __x); +} + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } +__DEVICE__ +float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } +__DEVICE__ +float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } +__DEVICE__ +float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fadd_rn(float __x, float __y) { return __x + __y; } +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } +__DEVICE__ +float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } +__DEVICE__ +float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } +__DEVICE__ +float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fdiv_rn(float __x, float __y) { return __x / __y; } +#endif + +__DEVICE__ +float __fdividef(float __x, float __y) { return __x / __y; } + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fmaf_rd(float __x, float __y, float __z) { + return __ocml_fma_rtn_f32(__x, __y, __z); +} +__DEVICE__ +float __fmaf_rn(float __x, float __y, float __z) { + return __ocml_fma_rte_f32(__x, __y, __z); +} +__DEVICE__ +float __fmaf_ru(float __x, float __y, float __z) { + return __ocml_fma_rtp_f32(__x, __y, __z); +} +__DEVICE__ +float __fmaf_rz(float __x, float __y, float __z) { + return __ocml_fma_rtz_f32(__x, __y, __z); +} +#else +__DEVICE__ +float __fmaf_rn(float __x, float __y, float __z) { + return __builtin_fmaf(__x, __y, __z); +} +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } +__DEVICE__ +float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } +__DEVICE__ +float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } +__DEVICE__ +float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fmul_rn(float __x, float __y) { return __x * __y; } +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } +__DEVICE__ +float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } +__DEVICE__ +float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } +__DEVICE__ +float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } +#else +__DEVICE__ +float __frcp_rn(float __x) { return 1.0f / __x; } +#endif + +__DEVICE__ +float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); } + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } +__DEVICE__ +float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } +__DEVICE__ +float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } +__DEVICE__ +float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } +#else +__DEVICE__ +float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } +__DEVICE__ +float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } +__DEVICE__ +float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } +__DEVICE__ +float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fsub_rn(float __x, float __y) { return __x - __y; } +#endif + +__DEVICE__ +float __log10f(float __x) { return __builtin_log10f(__x); } + +__DEVICE__ +float __log2f(float __x) { return __builtin_amdgcn_logf(__x); } + +__DEVICE__ +float __logf(float __x) { return __builtin_logf(__x); } + +__DEVICE__ +float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } + +__DEVICE__ +float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } + +__DEVICE__ +void __sincosf(float __x, float *__sinptr, float *__cosptr) { + *__sinptr = __ocml_native_sin_f32(__x); + *__cosptr = __ocml_native_cos_f32(__x); +} + +__DEVICE__ +float __sinf(float __x) { return __ocml_native_sin_f32(__x); } + +__DEVICE__ +float __tanf(float __x) { + return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x)); +} +// END INTRINSICS + #if defined(__cplusplus) __DEVICE__ int abs(int __x) { - int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; + return __builtin_abs(__x); } __DEVICE__ long labs(long __x) { - long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; + return __builtin_labs(__x); } __DEVICE__ long long llabs(long long __x) { - long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; + return __builtin_llabs(__x); } #endif @@ -185,13 +349,13 @@ __DEVICE__ float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } __DEVICE__ -float ceilf(float __x) { return __ocml_ceil_f32(__x); } +float ceilf(float __x) { return __builtin_ceilf(__x); } __DEVICE__ -float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); } +float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); } __DEVICE__ -float cosf(float __x) { return __ocml_cos_f32(__x); } +float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); } __DEVICE__ float coshf(float __x) { return __ocml_cosh_f32(__x); } @@ -224,16 +388,16 @@ __DEVICE__ float exp10f(float __x) { return __ocml_exp10_f32(__x); } __DEVICE__ -float exp2f(float __x) { return __ocml_exp2_f32(__x); } +float exp2f(float __x) { return __builtin_exp2f(__x); } __DEVICE__ -float expf(float __x) { return __ocml_exp_f32(__x); } +float expf(float __x) { return __builtin_expf(__x); } __DEVICE__ float expm1f(float __x) { return __ocml_expm1_f32(__x); } __DEVICE__ -float fabsf(float __x) { return __ocml_fabs_f32(__x); } +float fabsf(float __x) { return __builtin_fabsf(__x); } __DEVICE__ float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } @@ -242,33 +406,25 @@ __DEVICE__ float fdividef(float __x, float __y) { return __x / __y; } __DEVICE__ -float floorf(float __x) { return __ocml_floor_f32(__x); } +float floorf(float __x) { return __builtin_floorf(__x); } __DEVICE__ float fmaf(float __x, float __y, float __z) { - return __ocml_fma_f32(__x, __y, __z); + return __builtin_fmaf(__x, __y, __z); } __DEVICE__ -float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } +float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); } __DEVICE__ -float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } +float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); } __DEVICE__ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } __DEVICE__ float frexpf(float __x, int *__nptr) { - int __tmp; -#ifdef __OPENMP_AMDGCN__ -#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) -#endif - float __r = - __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); - *__nptr = __tmp; - - return __r; + return __builtin_frexpf(__x, __nptr); } __DEVICE__ @@ -278,13 +434,13 @@ __DEVICE__ int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } __DEVICE__ -__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); } +__RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); } __DEVICE__ -__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); } +__RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); } __DEVICE__ -__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); } +__RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); } __DEVICE__ float j0f(float __x) { return __ocml_j0_f32(__x); } @@ -314,37 +470,37 @@ float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication } __DEVICE__ -float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } +float ldexpf(float __x, int __e) { return __builtin_amdgcn_ldexpf(__x, __e); } __DEVICE__ float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } __DEVICE__ -long long int llrintf(float __x) { return __ocml_rint_f32(__x); } +long long int llrintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ -long long int llroundf(float __x) { return __ocml_round_f32(__x); } +long long int llroundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ -float log10f(float __x) { return __ocml_log10_f32(__x); } +float log10f(float __x) { return __builtin_log10f(__x); } __DEVICE__ float log1pf(float __x) { return __ocml_log1p_f32(__x); } __DEVICE__ -float log2f(float __x) { return __ocml_log2_f32(__x); } +float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); } __DEVICE__ float logbf(float __x) { return __ocml_logb_f32(__x); } __DEVICE__ -float logf(float __x) { return __ocml_log_f32(__x); } +float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); } __DEVICE__ -long int lrintf(float __x) { return __ocml_rint_f32(__x); } +long int lrintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ -long int lroundf(float __x) { return __ocml_round_f32(__x); } +long int lroundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float modff(float __x, float *__iptr) { @@ -359,7 +515,7 @@ float modff(float __x, float *__iptr) { } __DEVICE__ -float nanf(const char *__tagp) { +float nanf(const char *__tagp __attribute__((nonnull))) { union { float val; struct ieee_float { @@ -380,7 +536,7 @@ float nanf(const char *__tagp) { } __DEVICE__ -float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } +float nearbyintf(float __x) { return __builtin_nearbyintf(__x); } __DEVICE__ float nextafterf(float __x, float __y) { @@ -412,7 +568,7 @@ float normf(int __dim, ++__a; } - return __ocml_sqrt_f32(__r); + return __builtin_sqrtf(__r); } __DEVICE__ @@ -446,7 +602,7 @@ __DEVICE__ float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } __DEVICE__ -float rintf(float __x) { return __ocml_rint_f32(__x); } +float rintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ float rnorm3df(float __x, float __y, float __z) { @@ -471,22 +627,22 @@ float rnormf(int __dim, } __DEVICE__ -float roundf(float __x) { return __ocml_round_f32(__x); } +float roundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } __DEVICE__ float scalblnf(float __x, long int __n) { - return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n) + return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n) : __ocml_scalb_f32(__x, __n); } __DEVICE__ -float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } +float scalbnf(float __x, int __n) { return __builtin_amdgcn_ldexpf(__x, __n); } __DEVICE__ -__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } +__RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); } __DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr) { @@ -494,9 +650,13 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) { #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif +#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__ + __sincosf(__x, __sinptr, __cosptr); +#else *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; +#endif } __DEVICE__ @@ -511,7 +671,7 @@ void sincospif(float __x, float *__sinptr, float *__cosptr) { } __DEVICE__ -float sinf(float __x) { return __ocml_sin_f32(__x); } +float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); } __DEVICE__ float sinhf(float __x) { return __ocml_sinh_f32(__x); } @@ -520,7 +680,7 @@ __DEVICE__ float sinpif(float __x) { return __ocml_sinpi_f32(__x); } __DEVICE__ -float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } +float sqrtf(float __x) { return __builtin_sqrtf(__x); } __DEVICE__ float tanf(float __x) { return __ocml_tan_f32(__x); } @@ -532,7 +692,7 @@ __DEVICE__ float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } __DEVICE__ -float truncf(float __x) { return __ocml_trunc_f32(__x); } +float truncf(float __x) { return __builtin_truncf(__x); } __DEVICE__ float y0f(float __x) { return __ocml_y0_f32(__x); } @@ -562,158 +722,7 @@ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication return __x1; } -// BEGIN INTRINSICS - -__DEVICE__ -float __cosf(float __x) { return __ocml_native_cos_f32(__x); } - -__DEVICE__ -float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } - -__DEVICE__ -float __expf(float __x) { return __ocml_native_exp_f32(__x); } - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } -__DEVICE__ -float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } -__DEVICE__ -float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } -__DEVICE__ -float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fadd_rn(float __x, float __y) { return __x + __y; } -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } -__DEVICE__ -float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } -__DEVICE__ -float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } -__DEVICE__ -float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fdiv_rn(float __x, float __y) { return __x / __y; } -#endif - -__DEVICE__ -float __fdividef(float __x, float __y) { return __x / __y; } - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fmaf_rd(float __x, float __y, float __z) { - return __ocml_fma_rtn_f32(__x, __y, __z); -} -__DEVICE__ -float __fmaf_rn(float __x, float __y, float __z) { - return __ocml_fma_rte_f32(__x, __y, __z); -} -__DEVICE__ -float __fmaf_ru(float __x, float __y, float __z) { - return __ocml_fma_rtp_f32(__x, __y, __z); -} -__DEVICE__ -float __fmaf_rz(float __x, float __y, float __z) { - return __ocml_fma_rtz_f32(__x, __y, __z); -} -#else -__DEVICE__ -float __fmaf_rn(float __x, float __y, float __z) { - return __ocml_fma_f32(__x, __y, __z); -} -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } -__DEVICE__ -float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } -__DEVICE__ -float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } -__DEVICE__ -float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fmul_rn(float __x, float __y) { return __x * __y; } -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } -__DEVICE__ -float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } -__DEVICE__ -float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } -__DEVICE__ -float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } -#else -__DEVICE__ -float __frcp_rn(float __x) { return 1.0f / __x; } -#endif - -__DEVICE__ -float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } -__DEVICE__ -float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } -__DEVICE__ -float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } -__DEVICE__ -float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } -#else -__DEVICE__ -float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } -#endif - -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } -__DEVICE__ -float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } -__DEVICE__ -float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } -__DEVICE__ -float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } -#else -__DEVICE__ -float __fsub_rn(float __x, float __y) { return __x - __y; } -#endif - -__DEVICE__ -float __log10f(float __x) { return __ocml_native_log10_f32(__x); } - -__DEVICE__ -float __log2f(float __x) { return __ocml_native_log2_f32(__x); } - -__DEVICE__ -float __logf(float __x) { return __ocml_native_log_f32(__x); } - -__DEVICE__ -float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } - -__DEVICE__ -float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } - -__DEVICE__ -void __sincosf(float __x, float *__sinptr, float *__cosptr) { - *__sinptr = __ocml_native_sin_f32(__x); - *__cosptr = __ocml_native_cos_f32(__x); -} - -__DEVICE__ -float __sinf(float __x) { return __ocml_native_sin_f32(__x); } -__DEVICE__ -float __tanf(float __x) { return __ocml_tan_f32(__x); } -// END INTRINSICS // END FLOAT // BEGIN DOUBLE @@ -742,11 +751,11 @@ __DEVICE__ double cbrt(double __x) { return __ocml_cbrt_f64(__x); } __DEVICE__ -double ceil(double __x) { return __ocml_ceil_f64(__x); } +double ceil(double __x) { return __builtin_ceil(__x); } __DEVICE__ double copysign(double __x, double __y) { - return __ocml_copysign_f64(__x, __y); + return __builtin_copysign(__x, __y); } __DEVICE__ @@ -792,38 +801,31 @@ __DEVICE__ double expm1(double __x) { return __ocml_expm1_f64(__x); } __DEVICE__ -double fabs(double __x) { return __ocml_fabs_f64(__x); } +double fabs(double __x) { return __builtin_fabs(__x); } __DEVICE__ double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } __DEVICE__ -double floor(double __x) { return __ocml_floor_f64(__x); } +double floor(double __x) { return __builtin_floor(__x); } __DEVICE__ double fma(double __x, double __y, double __z) { - return __ocml_fma_f64(__x, __y, __z); + return __builtin_fma(__x, __y, __z); } __DEVICE__ -double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } +double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); } __DEVICE__ -double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } +double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); } __DEVICE__ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } __DEVICE__ double frexp(double __x, int *__nptr) { - int __tmp; -#ifdef __OPENMP_AMDGCN__ -#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) -#endif - double __r = - __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); - *__nptr = __tmp; - return __r; + return __builtin_frexp(__x, __nptr); } __DEVICE__ @@ -833,13 +835,13 @@ __DEVICE__ int ilogb(double __x) { return __ocml_ilogb_f64(__x); } __DEVICE__ -__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); } +__RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); } __DEVICE__ -__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); } +__RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); } __DEVICE__ -__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); } +__RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); } __DEVICE__ double j0(double __x) { return __ocml_j0_f64(__x); } @@ -869,16 +871,16 @@ double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication } __DEVICE__ -double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } +double ldexp(double __x, int __e) { return __builtin_amdgcn_ldexp(__x, __e); } __DEVICE__ double lgamma(double __x) { return __ocml_lgamma_f64(__x); } __DEVICE__ -long long int llrint(double __x) { return __ocml_rint_f64(__x); } +long long int llrint(double __x) { return __builtin_rint(__x); } __DEVICE__ -long long int llround(double __x) { return __ocml_round_f64(__x); } +long long int llround(double __x) { return __builtin_round(__x); } __DEVICE__ double log(double __x) { return __ocml_log_f64(__x); } @@ -896,10 +898,10 @@ __DEVICE__ double logb(double __x) { return __ocml_logb_f64(__x); } __DEVICE__ -long int lrint(double __x) { return __ocml_rint_f64(__x); } +long int lrint(double __x) { return __builtin_rint(__x); } __DEVICE__ -long int lround(double __x) { return __ocml_round_f64(__x); } +long int lround(double __x) { return __builtin_round(__x); } __DEVICE__ double modf(double __x, double *__iptr) { @@ -943,7 +945,7 @@ double nan(const char *__tagp) { } __DEVICE__ -double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } +double nearbyint(double __x) { return __builtin_nearbyint(__x); } __DEVICE__ double nextafter(double __x, double __y) { @@ -959,7 +961,7 @@ double norm(int __dim, ++__a; } - return __ocml_sqrt_f64(__r); + return __builtin_sqrt(__r); } __DEVICE__ @@ -1009,7 +1011,7 @@ __DEVICE__ double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } __DEVICE__ -double rint(double __x) { return __ocml_rint_f64(__x); } +double rint(double __x) { return __builtin_rint(__x); } __DEVICE__ double rnorm(int __dim, @@ -1034,21 +1036,21 @@ double rnorm4d(double __x, double __y, double __z, double __w) { } __DEVICE__ -double round(double __x) { return __ocml_round_f64(__x); } +double round(double __x) { return __builtin_round(__x); } __DEVICE__ double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } __DEVICE__ double scalbln(double __x, long int __n) { - return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n) + return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n) : __ocml_scalb_f64(__x, __n); } __DEVICE__ -double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); } +double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); } __DEVICE__ -__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); } +__RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); } __DEVICE__ double sin(double __x) { return __ocml_sin_f64(__x); } @@ -1082,7 +1084,7 @@ __DEVICE__ double sinpi(double __x) { return __ocml_sinpi_f64(__x); } __DEVICE__ -double sqrt(double __x) { return __ocml_sqrt_f64(__x); } +double sqrt(double __x) { return __builtin_sqrt(__x); } __DEVICE__ double tan(double __x) { return __ocml_tan_f64(__x); } @@ -1094,7 +1096,7 @@ __DEVICE__ double tgamma(double __x) { return __ocml_tgamma_f64(__x); } __DEVICE__ -double trunc(double __x) { return __ocml_trunc_f64(__x); } +double trunc(double __x) { return __builtin_trunc(__x); } __DEVICE__ double y0(double __x) { return __ocml_y0_f64(__x); } @@ -1216,7 +1218,7 @@ __DEVICE__ double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } #else __DEVICE__ -double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } +double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS @@ -1261,7 +1263,7 @@ double __fma_rz(double __x, double __y, double __z) { #else __DEVICE__ double __fma_rn(double __x, double __y, double __z) { - return __ocml_fma_f64(__x, __y, __z); + return __builtin_fma(__x, __y, __z); } #endif // END INTRINSICS @@ -1293,29 +1295,30 @@ __DEVICE__ int max(int __arg1, int __arg2) { } __DEVICE__ -float max(float __x, float __y) { return fmaxf(__x, __y); } +float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); } __DEVICE__ -double max(double __x, double __y) { return fmax(__x, __y); } +double max(double __x, double __y) { return __builtin_fmax(__x, __y); } __DEVICE__ -float min(float __x, float __y) { return fminf(__x, __y); } +float min(float __x, float __y) { return __builtin_fminf(__x, __y); } __DEVICE__ -double min(double __x, double __y) { return fmin(__x, __y); } +double min(double __x, double __y) { return __builtin_fmin(__x, __y); } #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) __host__ inline static int min(int __arg1, int __arg2) { - return std::min(__arg1, __arg2); + return __arg1 < __arg2 ? __arg1 : __arg2; } __host__ inline static int max(int __arg1, int __arg2) { - return std::max(__arg1, __arg2); + return __arg1 > __arg2 ? __arg1 : __arg2; } #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) #endif #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") +#pragma pop_macro("__FAST_OR_SLOW") #endif // __CLANG_HIP_MATH_H__ |