44 namespace threadblock {
   131   bool Transpose = 
false   144 template <
typename Shape_, 
typename Element_, 
int AdvanceRank, 
typename ThreadMap_, 
bool Transpose_>
   148       AdvanceRank == 0 || AdvanceRank == 1,
   149       "Specialization for pitch-linear iterator may along advance along the "   150       "contiguous(rank=0) or strided(rank=1) dimension.");
   153   using Element = Element_;
   155   static int const kAdvanceRank = AdvanceRank;
   156   using ThreadMap = ThreadMap_;
   170   struct alignas((ThreadMap::kElementsPerAccess * sizeof_bits<Element>::value /
   174     Array<Element, ThreadMap::kElementsPerAccess> storage;
   176     static int const kElements = ThreadMap::kElementsPerAccess;
   181   static bool const transpose = Transpose_;
   186                                    ThreadMap, AccessType>;
   189   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount *
   190                                                ThreadMap::ThreadAccessShape::kCount>;
   193   using Mask = 
typename TileAccessIterator::Mask;
   202     typename TileAccessIterator::Params params_;
   207     Params(Layout 
const &layout) : params_(layout) { }
   215   using BytePointer = 
char *;
   231       Params 
const ¶ms,
   240       : address_iterator_(params.params_, pointer, extent, thread_id,
   241                           threadblock_offset) {}
   246       Params 
const ¶ms,  
   251       : PredicatedTileIterator2dThreadTile(params, pointer, extent, thread_id,
   256   void add_pointer_offset(
LongIndex pointer_offset) {
   257     address_iterator_.add_pointer_offset(pointer_offset);
   269       address_iterator_.add_tile_offset({0, 1});
   271       address_iterator_.add_tile_offset({1, 0});
   291   void clear_mask() { address_iterator_.clear_mask(); }
   295   void enable_mask() { address_iterator_.enable_mask(); }
   299   void set_mask(
Mask const &mask) { address_iterator_.set_mask(mask); }
   303   void get_mask(
Mask &mask) { address_iterator_.get_mask(mask); }
   307   void load_with_pointer_offset(
Fragment &frag, 
Index pointer_offset) {
   309     AccessType *frag_ptr = 
reinterpret_cast<AccessType *
>(&frag);
   312     for (
int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
   314       for (
int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
   316         for (
int ts = 0; ts < ThreadMap::ThreadAccessShape::kStrided; ts++){
   318           int access_idx = ts + c * ThreadMap::ThreadAccessShape::kStrided  + \
   319               s * ThreadMap::Iterations::kContiguous * ThreadMap::ThreadAccessShape::kStrided;
   321           address_iterator_.set_iteration_index(access_idx);
   322           if (address_iterator_.valid()) {
   324             frag_ptr[access_idx] =
   325                 *(address_iterator_.get() + pointer_offset);
   335       t.transform(frag, frag);
   341   void load(
Fragment &frag) { load_with_pointer_offset(frag, 0); }
   345   void store_with_pointer_offset(
Fragment const &frag, 
Index pointer_offset) {
   347     AccessType 
const *frag_ptr = 
reinterpret_cast<AccessType 
const *
>(&frag);
   350     for (
int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {
   352       for (
int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {
   354         for (
int ts = 0; ts < ThreadMap::ThreadAccessShape::kStrided; ts++){
   356           int access_idx = ts + c * ThreadMap::ThreadAccessShape::kStrided  + \
   357               s * ThreadMap::Iterations::kContiguous * ThreadMap::ThreadAccessShape::kStrided;
   359           address_iterator_.set_iteration_index(access_idx);
   360           if (address_iterator_.valid()) {
   361             *(address_iterator_.get() + pointer_offset) = frag_ptr[access_idx];
   371   void store(
Fragment const &frag) { store_with_pointer_offset(frag, 0); }
   394     "Specialization for pitch-linear iterator may along advance along the "   395     "contiguous(rank=0) or strided(rank=1) dimension.");
   398   using Element = Element_;
   400   static int const kAdvanceRank = AdvanceRank;
   401   using ThreadMap = ThreadMap_;
   402   static bool const Transpose = Transpose_;
   418     (kAdvanceRank == 0 ? 0 : 1),
   423   using AccessType = 
typename UnderlyingIterator::AccessType;
   426   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount * ThreadMap::ThreadAccessShape::kCount>;
   429   using Mask = 
typename UnderlyingIterator::Mask;
   438     typename UnderlyingIterator::Params params_;
   447     Params(Layout 
const &layout): params_(layout::PitchLinear(layout.stride(0))) {
   467     Params 
const ¶ms,                         
   476       layout::PitchLinearCoord(extent.row(), extent.column()),
   478       layout::PitchLinearCoord(threadblock_offset.row(), threadblock_offset.column())
   484     Params 
const ¶ms,                         
   488   ): PredicatedTileIterator2dThreadTile(params, pointer, extent, thread_id, 
make_Coord(0, 0)) { }
   492   void add_pointer_offset(
LongIndex pointer_offset) {
   493     iterator_.add_pointer_offset(pointer_offset);
   522     iterator_.clear_mask();
   528     iterator_.enable_mask();
   533   void set_mask(
Mask const &mask) {
   534     iterator_.set_mask(mask);
   539   void get_mask(
Mask &mask) {
   540     iterator_.get_mask(mask);
   545   void load_with_pointer_offset(
Fragment &frag, 
Index pointer_offset) {
   546     iterator_.load_with_pointer_offset(frag, pointer_offset);
   552     load_with_pointer_offset(frag, 0);
   557   void store_with_pointer_offset(
Fragment const &frag, 
Index pointer_offset) {
   558     iterator_.store_with_pointer_offset(frag, pointer_offset);
   564     store_with_pointer_offset(frag, 0);
   588     "Specialization for pitch-linear iterator may along advance along the "   589     "contiguous(rank=0) or strided(rank=1) dimension.");
   592   using Element = Element_;
   594   static int const kAdvanceRank = AdvanceRank;
   595   using ThreadMap = ThreadMap_;
   596   static bool const Transpose = Transpose_;
   612     (kAdvanceRank == 0 ? 1 : 0),
   617   using AccessType = 
typename UnderlyingIterator::AccessType;
   620   using Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount * ThreadMap::ThreadAccessShape::kCount>;
   623   using Mask = 
typename UnderlyingIterator::Mask;
   632     typename UnderlyingIterator::Params params_;
   641     Params(Layout 
const &layout): params_(layout::PitchLinear(layout.stride(0))) {
   661     Params 
const ¶ms,                         
   670       layout::PitchLinearCoord(extent.column(), extent.row()),
   672       layout::PitchLinearCoord(threadblock_offset.column(), threadblock_offset.row())
   678     Params 
const ¶ms,                         
   682   ): PredicatedTileIterator2dThreadTile(params, pointer, extent, thread_id, 
make_Coord(0, 0)) { }
   686   void add_pointer_offset(
LongIndex pointer_offset) {
   687     iterator_.add_pointer_offset(pointer_offset);
   716     iterator_.clear_mask();
   722     iterator_.enable_mask();
   727   void set_mask(
Mask const &mask) {
   728     iterator_.set_mask(mask);
   733   void get_mask(
Mask &mask) {
   734     iterator_.get_mask(mask);
   739   void load_with_pointer_offset(
Fragment &frag, 
Index pointer_offset) {
   740     iterator_.load_with_pointer_offset(frag, pointer_offset);
   746     load_with_pointer_offset(frag, 0);
   751   void store_with_pointer_offset(
Fragment const &frag, 
Index pointer_offset) {
   752     iterator_.store_with_pointer_offset(frag, pointer_offset);
   758     store_with_pointer_offset(frag, 0);
 int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:62
Definition: aligned_buffer.h:35
Coordinate in pitch-linear space. 
Definition: pitch_linear.h:52
Basic copy routines for tensor views. 
Mapping function for pitch-linear memory. 
Definition: pitch_linear.h:163
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate. 
Definition: coord.h:387
int64_t LongIndex
Long index type used for offsets. 
Definition: layout/matrix.h:154
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
Template defining a shape used by pitch-linear operators. 
Definition: pitch_linear.h:43
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:59
CUTLASS_HOST_DEVICE half_t & operator++(half_t &lhs)
Definition: half.h:694
int64_t LongIndex
Long index type used for offsets. 
Definition: pitch_linear.h:175
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
int32_t Index
Index type used for coordinates. 
Definition: pitch_linear.h:172
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
int32_t Index
Index type used for coordinates. 
Definition: layout/matrix.h:151
Templates calculating the address and predicates to the load of tiles from pitch-linear rank=2 tensor...
Definition: matrix_coord.h:39