![]() |
CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <shared_load_iterator.h>
Public Types | |
| using | ThreadMap = ThreadMap_ |
| using | Shape = typename ThreadMap::Shape |
| using | Element = Element_ |
| using | Layout = layout::RowMajor |
| using | TensorRef = TensorRef< Element, Layout > |
| using | ConstTensorRef = typename TensorRef::ConstTensorRef |
| using | Index = typename Layout::Index |
| using | LongIndex = typename Layout::LongIndex |
| using | TensorCoord = MatrixCoord |
| using | Fragment = Array< Element, ThreadMap::Iterations::kColumn *ThreadMap::Iterations::kRow *ThreadMap::Iterations::kGroup *ThreadMap::Iterations::kCluster *ThreadMap::kElementsPerAccess > |
| Fragment object. More... | |
| using | AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess, kAlignment > |
| Memory access size. More... | |
Public Member Functions | |
| CUTLASS_DEVICE | SharedLoadIterator (TensorRef ref, int thread_idx) |
| Constructor. More... | |
| CUTLASS_HOST_DEVICE void | add_pointer_offset (LongIndex pointer_offset) |
| Adds a pointer offset in units of Element. More... | |
| CUTLASS_DEVICE void | add_tile_offset (TensorCoord const &offset) |
| CUTLASS_DEVICE void | load_with_pointer_offset (Fragment &frag, Index pointer_offset) |
| Loads a fragment from memory. More... | |
| CUTLASS_DEVICE void | load (Fragment &frag) |
| Loads a fragment. More... | |
Static Public Attributes | |
| static int const | kElementsPerAccess = ThreadMap::kElementsPerAccess |
| static int const | kMinAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8 |
| static int const | kAlignment = (MaxAlignment < kMinAlignment ? MaxAlignment : kMinAlignment) |
| static int const | kThreads = ThreadMap::kThreads |
Tile iterator used to load output tile from shared memory in epilogue.
Satisfies: ReadableTileIterator
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess, kAlignment> |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::ConstTensorRef = typename TensorRef::ConstTensorRef |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Element = Element_ |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Fragment = Array< Element, ThreadMap::Iterations::kColumn * ThreadMap::Iterations::kRow * ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster * ThreadMap::kElementsPerAccess> |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Index = typename Layout::Index |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Layout = layout::RowMajor |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::LongIndex = typename Layout::LongIndex |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Shape = typename ThreadMap::Shape |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::TensorCoord = MatrixCoord |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::TensorRef = TensorRef<Element, Layout> |
| using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::ThreadMap = ThreadMap_ |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
static |
|
static |
|
static |
|
static |
1.8.11