#include <batched_reduction_traits.h>
|  | 
| typedef BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ > | This_ | 
|  | 
| typedef cutlass::reduction::BatchedReduction< This_ > | KernelClass | 
|  | The struct that consumes this Traits.  More... 
 | 
|  | 
| typedef OutputTile_ | OutputTile | 
|  | 
| typedef SubTile_ | SubTile | 
|  | 
| typedef ThreadShape_ | ThreadShape | 
|  | 
| typedef ScalarA_ | ScalarA | 
|  | The input pointer type.  More... 
 | 
|  | 
| typedef ScalarC_ | ScalarC | 
|  | 
| typedef ScalarD_ | ScalarD | 
|  | The output pointer type.  More... 
 | 
|  | 
| typedef ScalarAlphaBeta_ | ScalarAlphaBeta | 
|  | The alpha beta type.  More... 
 | 
|  | 
| typedef ScalarAccum_ | ScalarAccum | 
|  | The type for accumulation.  More... 
 | 
|  | 
| typedef Index_ | Index | 
|  | The index.  More... 
 | 
|  | 
| typedef BlockSwizzle_ | BlockSwizzle | 
|  | The thread block swizzle.  More... 
 | 
|  | 
| typedef Functor_ | Functor | 
|  | 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef BlockSwizzle_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::BlockSwizzle | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef Functor_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::Functor | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef Index_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::Index | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef cutlass::reduction::BatchedReduction<This_> cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::KernelClass | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef OutputTile_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::OutputTile | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef ScalarA_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarA | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef ScalarAccum_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarAccum | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef ScalarAlphaBeta_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarAlphaBeta | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef ScalarC_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarC | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef ScalarD_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarD | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef SubTile_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::SubTile | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef BatchedReductionTraits<ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_> cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::This_ | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
      
        
          | typedef ThreadShape_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ThreadShape | 
      
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
  
  | 
        
          | int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::kThreads = SubTile::kW / ThreadShape::kW |  | static | 
 
Parameteres object constructable on the host The number of threads per thread block. can be deduced 
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
  
  | 
        
          | int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::maxInReg = maxInReg_ |  | static | 
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
  
  | 
        
          | int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::maxOutReg = maxOutReg_ |  | static | 
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
  
  | 
        
          | const int cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ReductionSize = ReductionSize_ |  | static | 
 
 
template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_  = Shape<1, 1, 128>, typename SubTile_  = Shape<1, 1, 64>, typename ThreadShape_  = Shape<1, 1, 2>, typename Index_  = int, typename BlockSwizzle_  = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_  = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >> 
  
  | 
        
          | const bool cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ThreadShapeMultiple2 = (ThreadShape::kW % 2 == 0) |  | static | 
 
 
The documentation for this struct was generated from the following file: