LdsBufferSequence< k_prefetches_, v_prefetches_, k_loops_, v_loops_ > Struct Template Reference

LdsBufferSequence&lt; k_prefetches_, v_prefetches_, k_loops_, v_loops_ &gt; Struct Template Reference#

Composable Kernel: ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::LdsBufferSequence< k_prefetches_, v_prefetches_, k_loops_, v_loops_ > Struct Template Reference
ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::LdsBufferSequence< k_prefetches_, v_prefetches_, k_loops_, v_loops_ > Struct Template Reference

#include <block_fmha_pipeline_qx_ks_vs_custom_policy.hpp>

Public Types

using type = remove_cvref_t<decltype(Make())>

Static Public Member Functions

static constexpr auto Make ()

Static Public Attributes

static constexpr index_t num_lds_buffers_ = max(k_prefetches_, v_prefetches_)
static constexpr index_t ceil_ = ((v_loops_ - 1) / num_lds_buffers_) * num_lds_buffers_

Member Typedef Documentation

◆ type

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<index_t k_prefetches_, index_t v_prefetches_, index_t k_loops_, index_t v_loops_>
using ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::LdsBufferSequence< k_prefetches_, v_prefetches_, k_loops_, v_loops_ >::type = remove_cvref_t<decltype(Make())>

Member Function Documentation

◆ Make()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<index_t k_prefetches_, index_t v_prefetches_, index_t k_loops_, index_t v_loops_>
constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::LdsBufferSequence< k_prefetches_, v_prefetches_, k_loops_, v_loops_ >::Make ( )
inlinestaticconstexpr

Member Data Documentation

◆ ceil_

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<index_t k_prefetches_, index_t v_prefetches_, index_t k_loops_, index_t v_loops_>
index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::LdsBufferSequence< k_prefetches_, v_prefetches_, k_loops_, v_loops_ >::ceil_ = ((v_loops_ - 1) / num_lds_buffers_) * num_lds_buffers_
staticconstexpr

◆ num_lds_buffers_

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<index_t k_prefetches_, index_t v_prefetches_, index_t k_loops_, index_t v_loops_>
index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::LdsBufferSequence< k_prefetches_, v_prefetches_, k_loops_, v_loops_ >::num_lds_buffers_ = max(k_prefetches_, v_prefetches_)
staticconstexpr

The documentation for this struct was generated from the following file: