|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
#include <default_mma_core_sm50.h>
| Static Public Attributes | |
| static int const | kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value | 
| Number of threads per warp.  More... | |
| static int const | kThreads = WarpCount::kCount * kWarpSize | 
| Number of threads total.  More... | |
Partial specialization:
A: column-major B: row-major InstructionShape: 1-by-1-by-1 Operator: SIMT
This uses the default warp-level operator given tile sizes
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::ElementA = ElementA_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::ElementB = ElementB_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::ElementC = ElementC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::InstructionShape = InstructionShape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::IteratorThreadMapA = transform::PitchLinearStripminedThreadMap< layout::PitchLinearShape<Shape::kM, Shape::kK>, kThreads, 1 > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::IteratorThreadMapB = transform::PitchLinearStripminedThreadMap< layout::PitchLinearShape<Shape::kN, Shape::kK>, kThreads, 1 > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::LayoutA = layout::ColumnMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::LayoutB = layout::RowMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::LayoutC = LayoutC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::MmaPolicy = MmaPolicy< WarpMma, MatrixShape<0, 0>, MatrixShape<0, 0>, WarpCount::kK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::OperatorClass = arch::OpClassSimt | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::Shape = Shape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kM, Shape::kK>, ElementA, SmemLayoutA, 1, IteratorThreadMapA > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kK, Shape::kN>, ElementB, SmemLayoutB, 0, IteratorThreadMapB > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemLayoutA = layout::ColumnMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemLayoutB = layout::RowMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::WarpCount = GemmShape< Shape::kM / WarpShape::kM, Shape::kN / WarpShape::kN, Shape::kK / WarpShape::kK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::WarpMma = cutlass::gemm::warp::MmaSimt< WarpShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, warp::MmaSimtPolicy< MatrixShape<4, 8>, layout::RowMajorInterleaved<2>, GemmShape< 128 / sizeof_bits<ElementA>::value, 128 / sizeof_bits<ElementB>::value, 1> > > > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::WarpShape = WarpShape_ | 
| 
 | static | 
| 
 | static | 
 1.8.11
 1.8.11