![]() |
CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Gemplate that handles conventional layouts for IDP4A.
#include <mma_sm61.h>
Public Types | |
| using | Shape = Shape_ |
| Size of the Gemm problem - concept: gemm::GemmShape<> More... | |
| using | ElementA = int8_t |
| Data type of operand A. More... | |
| using | LayoutA = layout::ColumnMajor |
| Layout of A matrix (concept: layout::MapFunc) More... | |
| using | ElementB = int8_t |
| Data type of operand B. More... | |
| using | LayoutB = layout::RowMajor |
| Layout of B matrix (concept: layout::MapFunc) More... | |
| using | ElementC = int32_t |
| Element type of operand C. More... | |
| using | LayoutC = LayoutC_ |
| Layout of C matrix (concept: layout::MapFunc) More... | |
| using | Operator = arch::OpMultiplyAdd |
| Underlying mathematical operator. More... | |
| using | FragmentA = Array< ElementA, Shape::kMK > |
| A operand storage. More... | |
| using | FragmentB = Array< ElementB, Shape::kKN > |
| B operand storage. More... | |
| using | FragmentC = Array< ElementC, Shape::kMN > |
| C operand storage. More... | |
Public Member Functions | |
| CUTLASS_HOST_DEVICE void | operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C) |
| Computes a matrix product D = A * B + C. More... | |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementA = int8_t |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementB = int8_t |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementC = int32_t |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentA = Array<ElementA, Shape::kMK> |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentB = Array<ElementB, Shape::kKN> |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentC = Array<ElementC, Shape::kMN> |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutA = layout::ColumnMajor |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutB = layout::RowMajor |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutC = LayoutC_ |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Operator = arch::OpMultiplyAdd |
| using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Shape = Shape_ |
|
inline |
Use 1x1x4 IDP4A sequence for bulk of computation
1.8.11