FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference

FlatmmPipelineAGmemBGmemCRegV1&lt; Problem, PipelinePolicy &gt; Struct Template Reference#

Composable Kernel: ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference
ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Struct Template Reference

#include <flatmm_pipeline_agmem_bgmem_creg_v1.hpp>

Public Types

using ADataType = remove_cvref_t<typename Problem::ADataType>
using BDataType = remove_cvref_t<typename Problem::BDataType>
using CDataType = remove_cvref_t<typename Problem::CDataType>
using BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>
using ALayout = remove_cvref_t<typename Problem::ALayout>
using BLayout = remove_cvref_t<typename Problem::BLayout>
using CLayout = remove_cvref_t<typename Problem::CLayout>
using BlockFlatmm
using WG = remove_cvref_t<decltype(config.template at<0>())>
using BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile>
using BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps>
using WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile>

Public Member Functions

template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename AElementFunction>
CK_TILE_HOST_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp>
CK_TILE_DEVICE auto operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const

Static Public Member Functions

static constexpr index_t GetVectorSizeA ()
static constexpr index_t GetVectorSizeB ()
static constexpr index_t GetVectorSizeC ()
static CK_TILE_HOST const std::string GetName ()
static CK_TILE_HOST_DEVICE constexpr auto TransposeC ()
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize ()
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM (index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler ()
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler ()

Static Public Attributes

static constexpr auto config
static constexpr index_t DsWritePreIssue = 3
static constexpr index_t DsReadPreload = 2
static constexpr index_t BlockSize = Problem::kBlockSize
static constexpr index_t WaveSize = get_warp_size()
static constexpr index_t kMPerBlock = BlockGemmShape::kM
static constexpr index_t kNPerBlock = BlockGemmShape::kN
static constexpr index_t kKPerBlock = BlockGemmShape::kK
static constexpr index_t flatKPerWarp = BlockGemmShape::flatKPerWarp
static constexpr index_t flatNPerWarp = BlockGemmShape::flatNPerWarp
static constexpr bool kPadM = Problem::kPadM
static constexpr bool kPadN = Problem::kPadN
static constexpr bool kPadK = Problem::kPadK
static constexpr index_t kLdsAlignmentInBytes = 16
static constexpr index_t NumWaveGroups = Problem::NumWaveGroups
static constexpr bool UsePersistentKernel = Problem::Traits::UsePersistentKernel
static constexpr auto I0 = number<0>()
static constexpr auto I1 = number<1>()
static constexpr auto I2 = number<2>()
static constexpr auto idxM = I0
static constexpr auto idxN = I1
static constexpr auto idxK = I2
static constexpr index_t MWarp = config.template at<1>()
static constexpr index_t NWarp = config.template at<2>()
static constexpr index_t MIterPerWarp = kMPerBlock / (MWarp * WG::kM)
static constexpr index_t NIterPerWarp = kNPerBlock / (NWarp * WG::kN)
static constexpr index_t KIterPerWarp = kKPerBlock / WG::kK
static constexpr index_t KFlatPerBlockPerIter = flatKPerWarp
static constexpr index_t NFlatPerBlockPerIter = flatNPerWarp
static constexpr index_t MPerBlockPerIter = kMPerBlock / MIterPerWarp
static constexpr index_t KPerBlockPerIter = kKPerBlock / KIterPerWarp
static constexpr index_t K1 = Problem::VectorLoadSize / sizeof(ADataType)
static constexpr index_t m_preload
static constexpr bool HasHotLoop = Problem::HasHotLoop
static constexpr auto TailNum = Problem::TailNum
static constexpr index_t mfma_per_wg = 1
static constexpr index_t dsread_per_wg
static constexpr index_t dsread_num_perK = dsread_per_wg * MIterPerWarp
static constexpr index_t dswrite_num_perK = dsread_num_perK / (MWarp * NWarp)
static constexpr index_t dswrite_rep = (dswrite_num_perK + MIterPerWarp - 1) / MIterPerWarp
static constexpr index_t Aload_num_perK = dswrite_num_perK
static constexpr index_t Aload_rep = dswrite_rep
static constexpr index_t Bload_num_perK = kNPerBlock * WG::kK / NWarp / K1 / WaveSize
static constexpr index_t HalfMIter = (MIterPerWarp + 1) / 2
static constexpr index_t Bload_rep = (Bload_num_perK + HalfMIter - 1) / HalfMIter
static constexpr index_t mfma_perM_perK = NIterPerWarp * mfma_per_wg
static constexpr index_t dswrite_mIter = (DsWritePreIssue - 1) % MIterPerWarp
static constexpr index_t dswrite_kIter = (DsWritePreIssue - 1) / MIterPerWarp
static constexpr bool DoubleSmemBuffer = false

Member Typedef Documentation

◆ ADataType

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ADataType = remove_cvref_t<typename Problem::ADataType>

◆ ALayout

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::ALayout = remove_cvref_t<typename Problem::ALayout>

◆ BDataType

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BDataType = remove_cvref_t<typename Problem::BDataType>

◆ BLayout

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BLayout = remove_cvref_t<typename Problem::BLayout>

◆ BlockFlatmm

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockFlatmm
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21

◆ BlockGemmShape

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape>

◆ BlockTile

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockTile = remove_cvref_t<typename BlockGemmShape::BlockTile>

◆ BlockWarps

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockWarps = remove_cvref_t<typename BlockGemmShape::BlockWarps>

◆ CDataType

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::CDataType = remove_cvref_t<typename Problem::CDataType>

◆ CLayout

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::CLayout = remove_cvref_t<typename Problem::CLayout>

◆ WarpTile

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WarpTile = remove_cvref_t<typename BlockGemmShape::WarpTile>

◆ WG

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
using ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WG = remove_cvref_t<decltype(config.template at<0>())>

Member Function Documentation

◆ GetName()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST const std::string ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetName ( )
inlinestaticnodiscard

◆ GetSmemSize()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetSmemSize ( )
inlinestaticconstexpr

◆ GetVectorSizeA()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
constexpr index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetVectorSizeA ( )
inlinestaticconstexpr

◆ GetVectorSizeB()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
constexpr index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetVectorSizeB ( )
inlinestaticconstexpr

◆ GetVectorSizeC()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
constexpr index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::GetVectorSizeC ( )
inlinestaticconstexpr

◆ HotLoopScheduler()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::HotLoopScheduler ( )
inlinestaticconstexpr

◆ Last2ndHotLoopScheduler()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Last2ndHotLoopScheduler ( )
inlinestaticconstexpr

◆ LastHotLoopScheduler()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::LastHotLoopScheduler ( )
inlinestaticconstexpr

◆ operator()() [1/2]

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename AElementFunction>
CK_TILE_HOST_DEVICE auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const AElementFunction & a_element_func,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
index_t num_loop,
void * p_smem_ping,
void * p_smem_pong ) const
inline

◆ operator()() [2/2]

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp>
CK_TILE_DEVICE auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::operator() ( const ADramBlockWindowTmp & a_dram_block_window_tmp,
const BFlatBlockWindowTmp & b_flat_dram_block_window_tmp,
index_t num_loop,
void * p_smem_ping,
void * p_smem_pong ) const
inline

◆ SchedulerPerM()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::SchedulerPerM ( index_t dsread_perM,
index_t dswrite_perM,
index_t load_perM )
inlinestaticconstexpr

◆ TransposeC()

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::TransposeC ( )
inlinestaticconstexpr

Member Data Documentation

◆ Aload_num_perK

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Aload_num_perK = dswrite_num_perK
staticconstexpr

◆ Aload_rep

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Aload_rep = dswrite_rep
staticconstexpr

◆ Bload_num_perK

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Bload_num_perK = kNPerBlock * WG::kK / NWarp / K1 / WaveSize
staticconstexpr

◆ Bload_rep

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::Bload_rep = (Bload_num_perK + HalfMIter - 1) / HalfMIter
staticconstexpr

◆ BlockSize

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::BlockSize = Problem::kBlockSize
staticconstexpr

◆ config

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::config
staticconstexpr
Initial value:
=
BlockFlatmm::BlockPolicy::template GetWarpGemmMWarpNWarp<Problem>()

◆ DoubleSmemBuffer

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
bool ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::DoubleSmemBuffer = false
staticconstexpr

◆ dsread_num_perK

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dsread_num_perK = dsread_per_wg * MIterPerWarp
staticconstexpr

◆ dsread_per_wg

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dsread_per_wg
staticconstexpr
Initial value:
=
WG::kM * WG::kK * sizeof(ADataType) / WaveSize / Problem::VectorLoadSize
remove_cvref_t< typename Problem::ADataType > ADataType
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:48
static constexpr index_t WaveSize
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:69

◆ DsReadPreload

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::DsReadPreload = 2
staticconstexpr

◆ dswrite_kIter

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_kIter = (DsWritePreIssue - 1) / MIterPerWarp
staticconstexpr

◆ dswrite_mIter

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_mIter = (DsWritePreIssue - 1) % MIterPerWarp
staticconstexpr

◆ dswrite_num_perK

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_num_perK = dsread_num_perK / (MWarp * NWarp)
staticconstexpr

◆ dswrite_rep

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::dswrite_rep = (dswrite_num_perK + MIterPerWarp - 1) / MIterPerWarp
staticconstexpr

◆ DsWritePreIssue

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::DsWritePreIssue = 3
staticconstexpr

◆ flatKPerWarp

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::flatKPerWarp = BlockGemmShape::flatKPerWarp
staticconstexpr

◆ flatNPerWarp

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::flatNPerWarp = BlockGemmShape::flatNPerWarp
staticconstexpr

◆ HalfMIter

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::HalfMIter = (MIterPerWarp + 1) / 2
staticconstexpr

◆ HasHotLoop

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
bool ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::HasHotLoop = Problem::HasHotLoop
staticconstexpr

◆ I0

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::I0 = number<0>()
staticconstexpr

◆ I1

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::I1 = number<1>()
staticconstexpr

◆ I2

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::I2 = number<2>()
staticconstexpr

◆ idxK

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::idxK = I2
staticconstexpr

◆ idxM

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::idxM = I0
staticconstexpr

◆ idxN

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::idxN = I1
staticconstexpr

◆ K1

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::K1 = Problem::VectorLoadSize / sizeof(ADataType)
staticconstexpr

◆ KFlatPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KFlatPerBlockPerIter = flatKPerWarp
staticconstexpr

◆ KIterPerWarp

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KIterPerWarp = kKPerBlock / WG::kK
staticconstexpr

◆ kKPerBlock

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kKPerBlock = BlockGemmShape::kK
staticconstexpr

◆ kLdsAlignmentInBytes

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kLdsAlignmentInBytes = 16
staticconstexpr

◆ kMPerBlock

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kMPerBlock = BlockGemmShape::kM
staticconstexpr

◆ kNPerBlock

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kNPerBlock = BlockGemmShape::kN
staticconstexpr

◆ kPadK

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
bool ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kPadK = Problem::kPadK
staticconstexpr

◆ kPadM

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
bool ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kPadM = Problem::kPadM
staticconstexpr

◆ kPadN

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
bool ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::kPadN = Problem::kPadN
staticconstexpr

◆ KPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::KPerBlockPerIter = kKPerBlock / KIterPerWarp
staticconstexpr

◆ m_preload

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::m_preload
staticconstexpr
Initial value:
static constexpr index_t MIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:103
static constexpr index_t KIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:105
static constexpr index_t DsReadPreload
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:66

◆ mfma_per_wg

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::mfma_per_wg = 1
staticconstexpr

◆ mfma_perM_perK

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::mfma_perM_perK = NIterPerWarp * mfma_per_wg
staticconstexpr

◆ MIterPerWarp

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::MIterPerWarp = kMPerBlock / (MWarp * WG::kM)
staticconstexpr

◆ MPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::MPerBlockPerIter = kMPerBlock / MIterPerWarp
staticconstexpr

◆ MWarp

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::MWarp = config.template at<1>()
staticconstexpr

◆ NFlatPerBlockPerIter

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NFlatPerBlockPerIter = flatNPerWarp
staticconstexpr

◆ NIterPerWarp

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NIterPerWarp = kNPerBlock / (NWarp * WG::kN)
staticconstexpr

◆ NumWaveGroups

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NumWaveGroups = Problem::NumWaveGroups
staticconstexpr

◆ NWarp

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::NWarp = config.template at<2>()
staticconstexpr

◆ TailNum

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
auto ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::TailNum = Problem::TailNum
staticconstexpr

◆ UsePersistentKernel

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
bool ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::UsePersistentKernel = Problem::Traits::UsePersistentKernel
staticconstexpr

◆ WaveSize

template<typename Problem, typename PipelinePolicy = UniversalFlatmmPipelineAgBgCrPolicy>
index_t ck_tile::FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy >::WaveSize = get_warp_size()
staticconstexpr

The documentation for this struct was generated from the following file: