44 namespace threadblock {
   131   int AccessSize = ThreadMap::kElementsPerAccess
   144 template <
typename Shape_, 
typename Element_, 
int AdvanceRank,
   145           typename ThreadMap_, 
int AccessSize>
   147                              ThreadMap_, AccessSize> {
   150       AdvanceRank == 0 || AdvanceRank == 1,
   151       "Specialization for pitch-linear iterator may along advance along the "   152       "contiguous(rank=0) or strided(rank=1) dimension.");
   155   using Element = Element_;
   157   static int const kAdvanceRank = AdvanceRank;
   158   using ThreadMap = ThreadMap_;
   178   static int const kAccessesPerVector = TileAccessIterator::kAccessesPerVector;
   181   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount *
   182                                                ThreadMap::kElementsPerAccess>;
   185   using Mask = 
typename TileAccessIterator::Mask;
   194     typename TileAccessIterator::Params params_;
   199     Params(Layout 
const &layout) : params_(layout) { }
   207   using BytePointer = 
char *;
   223       Params 
const ¶ms,
   232       : address_iterator_(params.params_, pointer, extent, thread_id,
   233                           threadblock_offset) {}
   238       Params 
const ¶ms,  
   249     address_iterator_.add_pointer_offset(pointer_offset);
   261       address_iterator_.add_tile_offset({0, 1});
   263       address_iterator_.add_tile_offset({1, 0});
   300     AccessType *frag_ptr = 
reinterpret_cast<AccessType *
>(&frag);
   303     for (
int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
   305       for (
int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
   308         for (
int v = 0; v < kAccessesPerVector; ++v) {
   310           int idx = v + kAccessesPerVector * (c + s * ThreadMap::Iterations::kContiguous);
   312           address_iterator_.set_iteration_index(idx);
   313           auto ptr = (address_iterator_.get() + pointer_offset);
   315           if (address_iterator_.valid()) {
   316               frag_ptr[idx] = *ptr;
   331     address_iterator_.set_iteration_index(0);
   332     AccessType 
const *frag_ptr = 
reinterpret_cast<AccessType 
const *
>(&frag);
   335     for (
int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
   337       for (
int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
   339         for (
int v = 0; v < kAccessesPerVector; ++v) {
   341           int idx = v + kAccessesPerVector * (c + s * ThreadMap::Iterations::kContiguous);
   343           if (address_iterator_.valid()) {
   344             *(address_iterator_.get() + pointer_offset) = frag_ptr[idx];
   377     "Specialization for pitch-linear iterator may along advance along the "   378     "contiguous(rank=0) or strided(rank=1) dimension.");
   381   using Element = Element_;
   383   static int const kAdvanceRank = AdvanceRank;
   384   using ThreadMap = ThreadMap_;
   400     (kAdvanceRank == 0 ? 0 : 1),
   408   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
   411   using Mask = 
typename UnderlyingIterator::Mask;
   420     typename UnderlyingIterator::Params params_;
   429     Params(Layout 
const &layout): params_(layout::PitchLinear(layout.stride(0))) {
   449     Params 
const ¶ms,                         
   458       layout::PitchLinearCoord(extent.row(), extent.column()),
   460       layout::PitchLinearCoord(threadblock_offset.row(), threadblock_offset.column())
   466     Params 
const ¶ms,                         
   475     iterator_.add_pointer_offset(pointer_offset);
   504     iterator_.clear_mask();
   510     iterator_.enable_mask();
   516     iterator_.set_mask(mask);
   522     iterator_.get_mask(mask);
   528     iterator_.load_with_pointer_offset(frag, pointer_offset);
   534     load_with_pointer_offset(frag, 0);
   540     iterator_.store_with_pointer_offset(frag, pointer_offset);
   546     store_with_pointer_offset(frag, 0);
   570     "Specialization for pitch-linear iterator may along advance along the "   571     "contiguous(rank=0) or strided(rank=1) dimension.");
   574   using Element = Element_;
   576   static int const kAdvanceRank = AdvanceRank;
   577   using ThreadMap = ThreadMap_;
   593     (kAdvanceRank == 0 ? 1 : 0),
   601   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;
   604   using Mask = 
typename UnderlyingIterator::Mask;
   613     typename UnderlyingIterator::Params params_;
   622     Params(Layout 
const &layout): params_(layout::PitchLinear(layout.stride(0))) {
   642     Params 
const ¶ms,                         
   651       layout::PitchLinearCoord(extent.column(), extent.row()),
   653       layout::PitchLinearCoord(threadblock_offset.column(), threadblock_offset.row())
   659     Params 
const ¶ms,                         
   668     iterator_.add_pointer_offset(pointer_offset);
   697     iterator_.clear_mask();
   703     iterator_.enable_mask();
   709     iterator_.set_mask(mask);
   715     iterator_.get_mask(mask);
   721     iterator_.load_with_pointer_offset(frag, pointer_offset);
   727     load_with_pointer_offset(frag, 0);
   733     iterator_.store_with_pointer_offset(frag, pointer_offset);
   739     store_with_pointer_offset(frag, 0);
   754 template <
typename Shape_, 
typename Element_, 
int AdvanceRank,
   755           typename ThreadMap_, 
int AccessSize, 
int InterleavedK>
   757                              layout::ColumnMajorInterleaved<InterleavedK>,
   758                              AdvanceRank, ThreadMap_, AccessSize> {
   761       AdvanceRank == 0 || AdvanceRank == 1,
   762       "Specialization for pitch-linear iterator may along advance along the "   763       "contiguous(rank=0) or strided(rank=1) dimension.");
   766   using Element = Element_;
   767   static int const kInterleavedK = InterleavedK;
   769   static int const kAdvanceRank = AdvanceRank;
   770   using ThreadMap = ThreadMap_;
   784                                Shape::kColumn / kInterleavedK>,
   791   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount *
   792                                                ThreadMap::kElementsPerAccess>;
   795   using Mask = 
typename UnderlyingIterator::Mask;
   803     typename UnderlyingIterator::Params params_;
   812         : params_(layout::PitchLinear(layout.stride(0))) {}
   829       Params 
const ¶ms,
   838       : iterator_(params.params_, pointer,
   839                   layout::PitchLinearCoord(extent.row() * kInterleavedK,
   840                                            extent.column() / kInterleavedK),
   842                   layout::PitchLinearCoord(
   843                       threadblock_offset.row() * kInterleavedK,
   844                       threadblock_offset.column() / kInterleavedK)) {}
   849       Params 
const ¶ms,  
   860     iterator_.add_pointer_offset(pointer_offset);
   907     iterator_.load_with_pointer_offset(frag, pointer_offset);
   917     iterator_.store_with_pointer_offset(frag, pointer_offset);
   935 template <
typename Shape_, 
typename Element_, 
int AdvanceRank,
   936           typename ThreadMap_, 
int AccessSize, 
int InterleavedK>
   938                              layout::RowMajorInterleaved<InterleavedK>,
   939                              AdvanceRank, ThreadMap_, AccessSize> {
   942       AdvanceRank == 0 || AdvanceRank == 1,
   943       "Specialization for pitch-linear iterator may along advance along the "   944       "contiguous(rank=0) or strided(rank=1) dimension.");
   947   using Element = Element_;
   948   static int const kInterleavedK = InterleavedK;
   950   static int const kAdvanceRank = AdvanceRank;
   951   using ThreadMap = ThreadMap_;
   965                                Shape::kRow / kInterleavedK>,
   972   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount *
   973                                                ThreadMap::kElementsPerAccess>;
   976   using Mask = 
typename UnderlyingIterator::Mask;
   984     typename UnderlyingIterator::Params params_;
   993         : params_(layout::PitchLinear(layout.stride(0))) {}
  1010       Params 
const ¶ms,
  1019       : iterator_(params.params_, pointer,
  1020                   layout::PitchLinearCoord(extent.column() * kInterleavedK,
  1021                                            extent.row() / kInterleavedK),
  1023                   layout::PitchLinearCoord(
  1024                       threadblock_offset.column() * kInterleavedK,
  1025                       threadblock_offset.row() / kInterleavedK)) {}
  1030       Params 
const ¶ms,  
  1041     iterator_.add_pointer_offset(pointer_offset);
  1088     iterator_.load_with_pointer_offset(frag, pointer_offset);
  1098     iterator_.store_with_pointer_offset(frag, pointer_offset);
 
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:62
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:355
Definition: aligned_buffer.h:35
Coordinate in pitch-linear space. 
Definition: pitch_linear.h:52
Architecture-specific operators on memory. 
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:249
Mapping function for pitch-linear memory. 
Definition: pitch_linear.h:163
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:352
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate. 
Definition: coord.h:387
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:154
Aligned array type. 
Definition: array.h:511
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:246
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:59
CUTLASS_HOST_DEVICE half_t & operator++(half_t &lhs)
Definition: half.h:694
int64_t LongIndex
Long index type used for offsets. 
Definition: pitch_linear.h:175
Templates calculating the address and predicates to the load of tiles from pitch-linear rank=2 tensor...
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
int32_t Index
Index type used for coordinates. 
Definition: pitch_linear.h:172
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
Definition: layout/matrix.h:343
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:151
Definition: matrix_coord.h:39
Definition: layout/matrix.h:237