SharedMemTrait Struct Reference#
ck::GridwiseFpAintBGemm_Wmma< BlockSize, ADataType, BDataType, ScaleDataType, AccDataType, CShuffleDataType, CDataType, CGlobalMemoryDataOperation, AGridDesc, BGridDesc, ScaleGridDesc, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, AEnableLds, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BEnableLds, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer >::SharedMemTrait Struct Reference
#include <gridwise_fpAintB_gemm_wmma.hpp>
Public Types | |
| using | LDS_ADataType = ADataType |
| using | LDS_BDataType = ADataType |
| using | LDS_CDataType = CShuffleDataType |
Static Public Attributes | |
| static constexpr auto | max_lds_align = K1 |
| static constexpr auto | a_block_space_size_aligned |
| static constexpr auto | b_block_space_size_aligned |
| static constexpr auto | a_block_space_offset = 0 |
| static constexpr auto | b_block_space_offset |
| static constexpr auto | c_shuffle_block_space_size |
| static constexpr auto | c_shuffle_block_space_offset = 0 |
| static constexpr auto | lds_size |
Member Typedef Documentation
◆ LDS_ADataType
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
| using ck::GridwiseFpAintBGemm_Wmma< BlockSize, ADataType, BDataType, ScaleDataType, AccDataType, CShuffleDataType, CDataType, CGlobalMemoryDataOperation, AGridDesc, BGridDesc, ScaleGridDesc, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, AEnableLds, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BEnableLds, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer >::SharedMemTrait::LDS_ADataType = ADataType |
◆ LDS_BDataType
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
| using ck::GridwiseFpAintBGemm_Wmma< BlockSize, ADataType, BDataType, ScaleDataType, AccDataType, CShuffleDataType, CDataType, CGlobalMemoryDataOperation, AGridDesc, BGridDesc, ScaleGridDesc, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, AEnableLds, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BEnableLds, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer >::SharedMemTrait::LDS_BDataType = ADataType |
◆ LDS_CDataType
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
| using ck::GridwiseFpAintBGemm_Wmma< BlockSize, ADataType, BDataType, ScaleDataType, AccDataType, CShuffleDataType, CDataType, CGlobalMemoryDataOperation, AGridDesc, BGridDesc, ScaleGridDesc, CGridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, KPerBlock, MPerWmma, NPerWmma, K1Value, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, AEnableLds, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BEnableLds, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, NumGemmKPrefetchStage, LoopSched, PipelineVer >::SharedMemTrait::LDS_CDataType = CShuffleDataType |
Member Data Documentation
◆ a_block_space_offset
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
◆ a_block_space_size_aligned
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
Initial value:
=
AEnableLds ? math::integer_least_multiple(MakeABlockDescriptor().GetElementSpaceSize(),
: 0
__host__ __device__ constexpr auto integer_least_multiple(X x, Y y)
Definition utility/math.hpp:78
static constexpr auto max_lds_align
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:504
__host__ static __device__ constexpr auto MakeABlockDescriptor()
Definition gridwise_fpAintB_gemm_wmma.hpp:163
◆ b_block_space_offset
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
Initial value:
=
sizeof(LDS_BDataType)
static constexpr auto a_block_space_size_aligned
Definition gridwise_fpAintB_gemm_wmma.hpp:570
ADataType LDS_BDataType
Definition gridwise_fpAintB_gemm_wmma.hpp:566
ADataType LDS_ADataType
Definition gridwise_fpAintB_gemm_wmma.hpp:565
static constexpr auto a_block_space_offset
Definition gridwise_fpAintB_gemm_wmma.hpp:579
◆ b_block_space_size_aligned
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
Initial value:
=
BEnableLds ? math::integer_least_multiple(MakeBBlockDescriptor().GetElementSpaceSize(),
: 0
static constexpr auto max_lds_align
Definition gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp:328
__host__ static __device__ constexpr auto MakeBBlockDescriptor()
Definition gridwise_fpAintB_gemm_wmma.hpp:210
◆ c_shuffle_block_space_offset
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
◆ c_shuffle_block_space_size
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
Initial value:
=
.GetElementSpaceSize()
__host__ static __device__ constexpr auto GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat()
Definition gridwise_fpAintB_gemm_wmma.hpp:407
◆ lds_size
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
Initial value:
=
static constexpr auto c_shuffle_block_space_size
Definition gridwise_fpAintB_gemm_wmma.hpp:587
static constexpr auto b_block_space_size_aligned
Definition gridwise_fpAintB_gemm_wmma.hpp:574
CShuffleDataType LDS_CDataType
Definition gridwise_fpAintB_gemm_wmma.hpp:567
◆ max_lds_align
template<index_t BlockSize, typename ADataType, typename BDataType, typename ScaleDataType, typename AccDataType, typename CShuffleDataType, typename CDataType, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename BGridDesc, typename ScaleGridDesc, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MPerWmma, index_t NPerWmma, index_t K1Value, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool AEnableLds, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BEnableLds, bool BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, index_t NumGemmKPrefetchStage = 1, LoopScheduler LoopSched = make_default_loop_scheduler(), PipelineVersion PipelineVer = PipelineVersion::weight_only>
|
staticconstexpr |
The documentation for this struct was generated from the following file: