48   typename OperatorShape, 
    59   typename OperatorShape_, 
    66   using OperatorShape = OperatorShape_;
    67   using Element = Element_;
    79     Policy::kRowsPerIteration,
    86     Policy::OperatorCount::kColumn * Policy::kElementsPerAccess>;
    92   static int const kIterations = Policy::kIterations;
    96     static int const kLanesInQuad = 4;
   102     Detail::kLanesInQuad * Policy::kElementsPerAccess>;
   131     pointer_(reinterpret_cast<
AccessType *>(ref.data())),
   132     layout_(ref.stride()[0] / 
Policy::kElementsPerAccess) { 
   134     int quad_id = (lane_id / Detail::kLanesInQuad); 
   135     int lane_in_quad = (lane_id % Detail::kLanesInQuad);
   137     pointer_ += layout_({quad_id, lane_in_quad});
   143     pointer_ += pointer_offset / Policy::kElementsPerAccess;
   151     pointer_ += layout_({
   152       tile_offset.
row() * Shape::kRow, 
   153       (tile_offset.
column() * Shape::kColumn / Policy::kElementsPerAccess)
   162     add_tile_offset(tile_offset);
   173     for (
int n = 0; n < Policy::OperatorCount::kColumn; ++n) {
   174       pointer_[n * Detail::kLanesInQuad + pointer_offset / Policy::kElementsPerAccess] = frag_ptr[n];
   181     store_with_pointer_offset(frag, 0);
   191     for (
int n = 0; n < Policy::OperatorCount::kColumn; ++n) {
   192       frag_ptr[n] = pointer_[n * Detail::kLanesInQuad + pointer_offset / Policy::kElementsPerAccess];
   199     load_with_pointer_offset(frag, 0);
 Describes the size of a matrix tile. 
Definition: matrix_shape.h:42
CUTLASS_HOST_DEVICE Index const & column() const 
Returns the column of the coordinate. 
Definition: matrix_coord.h:85
Definition: aligned_buffer.h:35
Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of TensorOp instructions, of which a row-oriented slice is visible per iteration. 
CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset) const 
Load. 
Definition: tile_iterator_tensor_op.h:186
CUTLASS_HOST_DEVICE TileIteratorTensorOp(TensorRef const &ref, unsigned lane_id)
Constructor from TensorRef. 
Definition: tile_iterator_tensor_op.h:127
WarpShape_ WarpShape
Definition: tile_iterator_tensor_op.h:65
CUTLASS_HOST_DEVICE TileIteratorTensorOp & add_pointer_offset(Index pointer_offset)
Adds a pointer offset. 
Definition: tile_iterator_tensor_op.h:142
CUTLASS_HOST_DEVICE TileIteratorTensorOp & add_tile_offset(TensorCoord const &tile_offset)
advances in units of whole tiles along the logical coordinate space of the tensor ...
Definition: tile_iterator_tensor_op.h:149
Aligned array type. 
Definition: array.h:511
CUTLASS_HOST_DEVICE Index const & row() const 
Returns the row of the coordinate. 
Definition: matrix_coord.h:77
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Store. 
Definition: tile_iterator_tensor_op.h:168
CUTLASS_HOST_DEVICE TileIteratorTensorOp & operator+=(TensorCoord const &tile_offset)
Definition: tile_iterator_tensor_op.h:161
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Store. 
Definition: tile_iterator_tensor_op.h:180
Policy details related to the epilogue. 
Definition: tensor_op_policy.h:50
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Template for reading and writing tiles of accumulators to shared memory. 
Definition: tile_iterator_tensor_op.h:52
typename TensorRef::LongIndex LongIndex
Definition: tile_iterator_tensor_op.h:73
typename TensorRef::Index Index
Definition: tile_iterator_tensor_op.h:72
typename Layout::Index Index
Index type. 
Definition: tensor_ref.h:165
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE TileIteratorTensorOp()
Default constructor. 
Definition: tile_iterator_tensor_op.h:123
Array< Element, Policy::OperatorCount::kColumn *Policy::kElementsPerAccess > Fragment
This is the fragment size produced by one access of the iterator. 
Definition: tile_iterator_tensor_op.h:86
Defines layout functions used by TensorRef and derived classes. 
Defines layout functions used by TensorRef and derived classes for pitch-linear memory. 
CUTLASS_HOST_DEVICE void load(Fragment &frag) const 
Load. 
Definition: tile_iterator_tensor_op.h:198
Definition: matrix_coord.h:39
typename Layout::LongIndex LongIndex
Long index used for pointer offsets. 
Definition: tensor_ref.h:168