47   static bool const value = ((N & (N - 1)) == 0);
    53 template <
int N, 
int CurrentVal = N, 
int Count = 0>
    60 template <
int N, 
int Count>
    68 template <
int N, 
int CurrentVal = N, 
int Count = 0>
    75 template <
int N, 
int Count>
    77   enum { 
value = ((1 << Count) < N) ? Count + 1 : Count };
    92 template <
int Div
idend, 
int Divisor>
    94   enum { 
value = Dividend / Divisor };
    96   static_assert((Dividend % Divisor == 0), 
"Not an even multiple");
   106 template <
typename div
idend_t, 
typename divisor_t>
   108   return ((dividend + divisor - 1) / divisor) * divisor;
   114 template <
typename value_t>
   117     if (a == 0) 
return b;
   119     if (b == 0) 
return a;
   127 template <
typename value_t>
   129   value_t temp = 
gcd(a, b);
   131   return temp ? (a / temp * b) : 0;
   139 template <
typename value_t>
   141   for (
int i = 31; i >= 0; --i) {
   142     if ((1 << i) & x) 
return 31 - i;
   147 template <
typename value_t>
   149   int a = int(31 - 
clz(x));
   150   a += (x & (x - 1)) != 0;  
   159 void find_divisor(
unsigned int& mul, 
unsigned int& shr, 
unsigned int denom) {
   165     unsigned m = unsigned(((1ull << p) + 
unsigned(denom) - 1) / 
unsigned(denom));
   176 void fast_divmod(
int& quo, 
int& rem, 
int src, 
int div, 
unsigned int mul, 
unsigned int shr) {
   178   #if defined(__CUDA_ARCH__)   180   quo = (div != 1) ? __umulhi(src, mul) >> shr : src;
   182   quo = int((div != 1) ? 
int(src * mul) >> shr : src);
   186   rem = src - (quo * div);
   192 void fast_divmod(
int& quo, int64_t& rem, int64_t src, 
int div, 
unsigned int mul, 
unsigned int shr) {
   194   #if defined(__CUDA_ARCH__)   196   quo = (div != 1) ? __umulhi(src, mul) >> shr : src;
   198   quo = int((div != 1) ? (src * mul) >> shr : src);
   201   rem = src - (quo * div);
   208 template <
int A, 
int B>
   210   static int const kValue = (A < B) ? A : B;
   213 template <
int A, 
int B>
   215   static int const kValue = (A > B) ? A : B;
   220     return (b < a ? b : a);
   225     return (b > a ? b : a);
 Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE void fast_divmod(int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr)
Definition: fast_math.h:176
CUTLASS_HOST_DEVICE value_t find_log2(value_t x)
Definition: fast_math.h:148
Definition: fast_math.h:54
Definition: fast_math.h:209
CUTLASS_HOST_DEVICE constexpr int const_max(int a, int b)
Definition: fast_math.h:224
static bool const value
Definition: fast_math.h:47
CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b)
Definition: fast_math.h:128
CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor)
Definition: fast_math.h:107
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Definition: fast_math.h:214
CUTLASS_HOST_DEVICE void find_divisor(unsigned int &mul, unsigned int &shr, unsigned int denom)
Definition: fast_math.h:159
CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b)
Definition: fast_math.h:115
Definition: fast_math.h:93
Definition: fast_math.h:69
CUTLASS_HOST_DEVICE value_t clz(value_t x)
Definition: fast_math.h:140
Definition: fast_math.h:46
CUTLASS_HOST_DEVICE constexpr int const_min(int a, int b)
Definition: fast_math.h:219
Basic include for CUTLASS. 
Definition: fast_math.h:84