54 namespace threadblock {
    61 template<
typename WarpShape>
    63     return (WarpShape::kM > WarpShape::kN) ? 8 : 4;
    68   return (size_in_bits >= 32 ?
    69       threads / crosswise / (size_in_bits / 32) :
    70       threads / crosswise * (32 / size_in_bits)
   103                       ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_
   115   static int const PartitionsK = Shape::kK / WarpShape::kK;
   122     Shape::kM / WarpShape::kM,
   123     Shape::kN / WarpShape::kN,
   129     !(Shape::kM % WarpShape::kM) &&
   130     !(Shape::kN % WarpShape::kN),
   131     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   138   static int const kThreads = WarpCount::kCount * kWarpSize;
   140   static int const kElementsPerAccess = 1;
   190   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
   191   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
   192   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
   193   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
   194   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
   195       "WarpShape must be divisible by ThreadTile shape.");
   196   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
   259                       ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_
   264   using ElementA = ElementA_;
   266   using ElementB = ElementB_;
   271   static int const PartitionsK = Shape::kK / WarpShape::kK;
   278     Shape::kM / WarpShape::kM,
   279     Shape::kN / WarpShape::kN,
   285     !(Shape::kM % WarpShape::kM) &&
   286     !(Shape::kN % WarpShape::kN),
   287     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   294   static int const kThreads = WarpCount::kCount * kWarpSize;
   296   static int const kElementsPerAccess = 1;
   321     MatrixShape<Shape::kM, Shape::kK>, 
   340     MatrixShape<Shape::kK, Shape::kN>, 
   352   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
   353   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
   354   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
   355   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
   356   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
   357       "WarpShape must be divisible by ThreadTile shape.");
   358   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
   425                       LayoutC_, arch::OpClassSimt, 2, Operator_
   430   using ElementA = ElementA_;
   432   using ElementB = ElementB_;
   437   static int const PartitionsK = Shape::kK / WarpShape::kK;
   444     Shape::kM / WarpShape::kM,
   445     Shape::kN / WarpShape::kN,
   451     !(Shape::kM % WarpShape::kM) &&
   452     !(Shape::kN % WarpShape::kN),
   453     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   460   static int const kThreads = WarpCount::kCount * kWarpSize;
   462   static int const kElementsPerAccess = 1;
   477     layout::PitchLinearShape<Shape::kK, Shape::kM>,
   487     MatrixShape<Shape::kM, Shape::kK>, 
   496     layout::PitchLinearShape<Shape::kN, Shape::kK>,
   503     MatrixShape<Shape::kK, Shape::kN>, 
   515   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
   516   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
   517   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
   518   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
   519   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
   520       "WarpShape must be divisible by ThreadTile shape.");
   521   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
   587                       ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_
   592   using ElementA = ElementA_;
   594   using ElementB = ElementB_;
   599   static int const PartitionsK = Shape::kK / WarpShape::kK;
   606     Shape::kM / WarpShape::kM,
   607     Shape::kN / WarpShape::kN,
   613     !(Shape::kM % WarpShape::kM) &&
   614     !(Shape::kN % WarpShape::kN),
   615     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   622   static int const kThreads = WarpCount::kCount * kWarpSize;
   624   static int const kElementsPerAccess = 1;
   639     layout::PitchLinearShape<Shape::kM, Shape::kK>,
   646     MatrixShape<Shape::kM, Shape::kK>, 
   655     layout::PitchLinearShape<Shape::kK, Shape::kN>,
   665     MatrixShape<Shape::kK, Shape::kN>, 
   677   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
   678   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
   679   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
   680   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
   681   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
   682       "WarpShape must be divisible by ThreadTile shape.");
   683   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
   745                       LayoutC_, arch::OpClassSimt, 2, Operator_
   751   using ElementA = int8_t;
   753   using ElementB = int8_t;
   758   static int const PartitionsK = Shape::kK / WarpShape::kK;
   765     Shape::kM / WarpShape::kM,
   766     Shape::kN / WarpShape::kN,
   772     !(Shape::kM % WarpShape::kM) &&
   773     !(Shape::kN % WarpShape::kN),
   774     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   781   static int const kThreads = WarpCount::kCount * kWarpSize;
   796     layout::PitchLinearShape<Shape::kM, Shape::kK>,
   803     MatrixShape<Shape::kM, Shape::kK>, 
   813     layout::PitchLinearShape<Shape::kN, Shape::kK>,
   820     MatrixShape<Shape::kK, Shape::kN>, 
   832   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
   833   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
   834   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
   835   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
   836   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
   837       "WarpShape must be divisible by ThreadTile shape.");
   838   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
   899                       LayoutC_, arch::OpClassSimt, 2, Operator_
   905   using ElementA = int8_t;
   907   using ElementB = int8_t;
   912   static int const PartitionsK = Shape::kK / WarpShape::kK;
   919     Shape::kM / WarpShape::kM,
   920     Shape::kN / WarpShape::kN,
   926     !(Shape::kM % WarpShape::kM) &&
   927     !(Shape::kN % WarpShape::kN),
   928     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."   935   static int const kThreads = WarpCount::kCount * kWarpSize;
   950     layout::PitchLinearShape<Shape::kK, Shape::kM>,
   960     MatrixShape<Shape::kM, Shape::kK>, 
   970     layout::PitchLinearShape<Shape::kK, Shape::kN>,
   980     MatrixShape<Shape::kK, Shape::kN>, 
   992   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
   993   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
   994   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
   995   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
   996   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
   997       "WarpShape must be divisible by ThreadTile shape.");
   998   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
  1053     typename WarpShape_,
  1062                       LayoutC_, arch::OpClassSimt, 2, Operator_
  1068   using ElementA = int8_t;
  1070   using ElementB = int8_t;
  1075   static int const PartitionsK = Shape::kK / WarpShape::kK;
  1082     Shape::kM / WarpShape::kM,
  1083     Shape::kN / WarpShape::kN,
  1089     !(Shape::kM % WarpShape::kM) &&
  1090     !(Shape::kN % WarpShape::kN),
  1091     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."  1098   static int const kThreads = WarpCount::kCount * kWarpSize;
  1113     layout::PitchLinearShape<Shape::kK, Shape::kM>,
  1123     MatrixShape<Shape::kM, Shape::kK>, 
  1132     layout::PitchLinearShape<Shape::kN, Shape::kK>,
  1139     MatrixShape<Shape::kK, Shape::kN>, 
  1151   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
  1152   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
  1153   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
  1154   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
  1155   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
  1156       "WarpShape must be divisible by ThreadTile shape.");
  1157   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
  1212     typename WarpShape_,
  1221                       LayoutC_, arch::OpClassSimt, 2, Operator_
  1227   using ElementA = int8_t;
  1229   using ElementB = int8_t;
  1234   static int const PartitionsK = Shape::kK / WarpShape::kK;
  1241     Shape::kM / WarpShape::kM,
  1242     Shape::kN / WarpShape::kN,
  1248     !(Shape::kM % WarpShape::kM) &&
  1249     !(Shape::kN % WarpShape::kN),
  1250     "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size."  1257   static int const kThreads = WarpCount::kCount * kWarpSize;
  1272     layout::PitchLinearShape<Shape::kM, Shape::kK>,
  1279     MatrixShape<Shape::kM, Shape::kK>, 
  1289     layout::PitchLinearShape<Shape::kK, Shape::kN>,
  1299     MatrixShape<Shape::kK, Shape::kN>, 
  1311   static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
  1312   static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
  1313   static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
  1314   static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
  1315   static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
  1316       "WarpShape must be divisible by ThreadTile shape.");
  1317   static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
 cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:756
Describes the lane policy used by warp-level matrix multiply operators targeting SIMT instructions...
Describes the size of a matrix tile. 
Definition: matrix_shape.h:42
Templates implementing loading of tiles from pitch-linear rank=2 tensors. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingM, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_simt.h:1195
Definition: aligned_buffer.h:35
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_simt.h:108
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:435
Query the number of threads per warp. 
Definition: gemm/warp/mma.h:43
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:755
Definition: default_mma_core.h:90
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_simt.h:719
Templates implementing how threads are mapped to a given tile. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:1225
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:1232
Structure to compute the matrix product targeting CUDA cores and SIMT math instructions. 
Definition: mma_simt.h:74
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:749
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:1237
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:1078
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:1074
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:436
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:113
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:761
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:910
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:1073
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:902
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:114
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:270
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:748
Describes the arrangement and configuration of per-lane operations in warp-level matrix multiply...
Definition: mma_simt_policy.h:46
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_simt.h:873
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:106
Defines a Shape template for matrix tiles. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:1231
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:105
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingM, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_simt.h:1036
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:262
Defines the size of an element in bits. 
Definition: numeric_types.h:42
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:1233
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:597
Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the glo...
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:434
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:903
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:118
Top-level include for all CUTLASS numeric types. 
Shape of a matrix multiply-add operation. 
Definition: include/cutlass/gemm/gemm.h:57
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:602
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:428
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:1072
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:598
constexpr int simt_transpose_padding(int threads, int crosswise, int size_in_bits)
Computes padding in shared memory to perform efficient transpose without bank conflicts. 
Definition: default_mma_core_simt.h:67
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:1066
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:261
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:589
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_simt.h:1355
Templates implementing loading of tiles from pitch-linear rank=2 tensors. 
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
constexpr int simt_get_warp_threads_m()
Definition: default_mma_core_simt.h:62
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:909
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:112
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:1065
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Used for partial specialization. 
Definition: default_mma_core_simt.h:229
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:911
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:596
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:440
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingN, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_simt.h:395
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:427
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_simt.h:110
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:268
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:274
Templates implementing warp-level matrix multiply-accumulate operations. 
CUTLASS_HOST_DEVICE constexpr int const_min(int a, int b)
Definition: fast_math.h:219
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator. 
Definition: default_mma_core_simt.h:915
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingM, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined. 
Definition: default_mma_core_simt.h:557
Basic include for CUTLASS. 
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:269
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:757
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:1224
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:590
Definition: layout/matrix.h:237