100         extent.
w() * extent.
c(),
   101         extent.
h() * extent.
w() * extent.
c()
   125     int n = 0, h = 0, w = 0, c = 0;
   127     #if defined(__CUDA_ARCH__)   129     c = int(index % static_cast<int>(stride_[0]));
   131     unsigned int hw_mul, hw_shr, w_mul, w_shr, c_mul, c_shr;
   137     fast_divmod(n, tmp, index, 
int(stride_[2]), hw_mul, hw_shr);
   138     fast_divmod(h, w, tmp, 
int(stride_[1]), w_mul, w_shr);
   139     fast_divmod(w, tmp, w, 
int(stride_[0]), c_mul, c_shr);
   142     n = int(index / (stride_[0] * stride_[1] * stride_[2]));
   143     LongIndex residual = index % (stride_[0] * stride_[1] * stride_[2]);
   145     h = int(residual / (stride_[0] * stride_[1]));
   146     residual = (residual % (stride_[0] * stride_[1]));
   148     w = int(residual / stride_[0]);
   149     c = int(residual % stride_[0]);
   173     if ((extent.
c() > stride_[0])
   174         || (extent.
w() * stride_[0] > stride_[1]) 
   175         || (extent.
h() * stride_[1] > stride_[2])) {
   178     return extent.
n() * stride_[2];
   189   static int const kRank = 4;
   192   static int const kStrideRank = 3;
   229         extent.
w() * extent.
h(),
   230         extent.
h() * extent.
w() * extent.
c()
   259     return extent.
n() * stride_[2];
   266 template <
int Interleave>
   271   static int const kInterleave = Interleave;
   274   static int const kRank = 4;
   277   static int const kStrideRank = 3;
   313         kInterleave * extent.
w(),
   314         kInterleave * extent.
w() * extent.
h(),
   315         extent.
h() * extent.
w() * extent.
c()
   324     Index c_minor = (coord.
c() % kInterleave);
   325     Index c_major = (coord.
c() / kInterleave);
   349     return extent.
n() * stride_[2];
   356 template <
int Interleave>
   361   static int const kInterleave = Interleave;
   364   static int const kRank = 4;
   367   static int const kStrideRank = 3;
   403         kInterleave * extent.
n(),
   404         kInterleave * extent.
n() * extent.
w(),
   405         kInterleave * extent.
n() * extent.
w() * extent.
h()
   414     Index c_minor = (coord.
c() % kInterleave);
   415     Index c_major = (coord.
c() / kInterleave);
   439     return (extent.
c() / kInterleave * stride_[2]);
 Coord< kStrideRank > Stride
Stride vector. 
Definition: tensor.h:71
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor.h:246
Defines a canonical 4D coordinate used by tensor operations. 
Definition: tensor_coord.h:38
CUTLASS_HOST_DEVICE TensorCxRSKx(Stride const &stride=Stride(0))
Constructor. 
Definition: tensor.h:396
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE void fast_divmod(int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr)
Definition: fast_math.h:176
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:348
CUTLASS_HOST_DEVICE TensorNCxHWx(Stride const &stride=Stride(0))
Constructor. 
Definition: tensor.h:306
static int const kStrideRank
Rank of stride vector. 
Definition: tensor.h:59
static CUTLASS_HOST_DEVICE TensorNCxHWx packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor.h:310
A Coord is a coordinate of arbitrary rank into a tensor or matrix. 
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate. 
Definition: coord.h:387
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Returns the offset of a coordinate in linear memory. 
Definition: tensor.h:412
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor.h:336
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:169
int32_t Index
Index type used for coordinates. 
Definition: tensor.h:62
Tensor4DCoord TensorCoord
Logical coordinate (n, h, w, c) 
Definition: tensor.h:68
Mapping function for 4-D NC/xHWx tensors. 
Definition: tensor.h:267
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor.h:198
CUTLASS_HOST_DEVICE Index const & w() const 
Returns the column of the coordinate. 
Definition: tensor_coord.h:95
CUTLASS_HOST_DEVICE TensorNHWC(Stride const &stride=Stride(0))
Constructor. 
Definition: tensor.h:88
int Index
Index type used to store elements. 
Definition: coord.h:55
static CUTLASS_HOST_DEVICE TensorNCHW packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor.h:225
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor.h:163
CUTLASS_HOST_DEVICE TensorNCHW(Stride const &stride=Stride(0))
Constructor. 
Definition: tensor.h:221
static int const kRank
Logical rank of tensor. 
Definition: tensor.h:56
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor.h:342
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:258
CUTLASS_HOST_DEVICE Index const & c() const 
Returns the channel of the coordinate. 
Definition: tensor_coord.h:103
int32_t Index
Index type used for coordinates. 
Definition: tensor.h:370
CUTLASS_HOST_DEVICE TensorNHWC(typename Stride::Index c, typename Stride::Index wc, typename Stride::Index hwc)
Constructor. 
Definition: tensor.h:92
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor.h:432
Defines a canonical coordinate for rank=4 tensors offering named indices. 
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor.h:426
Mapping function for 4-D CxRSKx tensors. 
Definition: tensor.h:357
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor.h:65
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
static CUTLASS_HOST_DEVICE TensorNHWC packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed NHWC tensor. 
Definition: tensor.h:96
Mapping function for 4-D NCHW tensors. 
Definition: tensor.h:186
CUTLASS_HOST_DEVICE Stride stride() const 
Returns the stride of the layout. 
Definition: tensor.h:157
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const 
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:438
static CUTLASS_HOST_DEVICE TensorCxRSKx packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor. 
Definition: tensor.h:400
int32_t Index
Index type used for coordinates. 
Definition: tensor.h:195
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Returns the offset of a coordinate in linear memory. 
Definition: tensor.h:237
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE Index const & n() const 
Returns the batch of the coordinate. 
Definition: tensor_coord.h:79
CUTLASS_HOST_DEVICE void find_divisor(unsigned int &mul, unsigned int &shr, unsigned int denom)
Definition: fast_math.h:159
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor.h:373
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout. 
Definition: tensor.h:252
Defines layout functions used by TensorRef and derived classes. 
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Returns the offset of a coordinate in linear memory. 
Definition: tensor.h:322
int64_t LongIndex
Long index type used for offsets. 
Definition: tensor.h:283
CUTLASS_HOST_DEVICE Index const & h() const 
Returns the row of the coordinate. 
Definition: tensor_coord.h:87
Mapping function for 4-D NHWC tensors. 
Definition: tensor.h:53
int32_t Index
Index type used for coordinates. 
Definition: tensor.h:280
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex index) const 
Returns the logical coordinate (n, h, w, c) from a given offset in linear memory. ...
Definition: tensor.h:123
Basic include for CUTLASS. 
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const 
Returns the offset of a coordinate (n, h, w, c) in linear memory. 
Definition: tensor.h:108