GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 > Struct Template Reference

GemmSpatiallyLocalTilePartitioner&lt; BlockGemmShapeType, GroupNum, M01 &gt; Struct Template Reference#

Composable Kernel: ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 > Struct Template Reference
ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 > Struct Template Reference

Class mapping 1D block index into 2D output tile space. More...

#include <gemm_tile_partitioner.hpp>

Public Types

using BlockGemmShape = remove_cvref_t<BlockGemmShapeType>

Public Member Functions

CK_TILE_HOST_DEVICE GemmSpatiallyLocalTilePartitioner () noexcept=delete
CK_TILE_HOST_DEVICE GemmSpatiallyLocalTilePartitioner (index_t M_, index_t N_) noexcept
CK_TILE_DEVICE auto GetOutputTileIndex (index_t block_1d_id) noexcept -> const tuple< index_t, index_t >
 Calculate workgroup 1D index mapping into 2D output C-tile space.

Static Public Member Functions

static CK_TILE_HOST_DEVICE auto GridSize (index_t M, index_t N) noexcept(noexcept(MPerBlock !=0 &&NPerBlock !=0)) -> index_t
 Calculates GEMM kernel grid size.
static CK_TILE_HOST_DEVICE auto GetLoopNum (index_t K) noexcept -> index_t
 Calculate number of loop iterations over GEMM's K dimension.

Static Public Attributes

static constexpr index_t MPerBlock = BlockGemmShape::kM
static constexpr index_t NPerBlock = BlockGemmShape::kN
static constexpr index_t KPerBlock = BlockGemmShape::kK

Detailed Description

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
struct ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >

Class mapping 1D block index into 2D output tile space.

Note
It groups spatially workgroups in order to better utilize caches. It is using grouped Rows of column-vectors WGP pattern. It's optimized for gfx94x-like multiple-die chip.
Template Parameters
GroupNum- The number of big groups.
M01- The number of groups in M dim within spatially local WGPs,

Member Typedef Documentation

◆ BlockGemmShape

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
using ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::BlockGemmShape = remove_cvref_t<BlockGemmShapeType>

Constructor & Destructor Documentation

◆ GemmSpatiallyLocalTilePartitioner() [1/2]

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
CK_TILE_HOST_DEVICE ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::GemmSpatiallyLocalTilePartitioner ( )
deletenoexcept

◆ GemmSpatiallyLocalTilePartitioner() [2/2]

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
CK_TILE_HOST_DEVICE ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::GemmSpatiallyLocalTilePartitioner ( index_t M_,
index_t N_ )
inlinenoexcept

Member Function Documentation

◆ GetLoopNum()

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
CK_TILE_HOST_DEVICE auto ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::GetLoopNum ( index_t K) ->index_t
inlinestaticnoexcept

Calculate number of loop iterations over GEMM's K dimension.

Parameters
KGEMM's K dimension.
Returns
index_t The number of loop iterations over K dimension.

◆ GetOutputTileIndex()

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
CK_TILE_DEVICE auto ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::GetOutputTileIndex ( index_t block_1d_id) ->consttuple< index_t, index_t >
inlinenoexcept

Calculate workgroup 1D index mapping into 2D output C-tile space.

Parameters
[in]block_1d_idWGP's index.
Returns
const tuple<index_t, index_t> Tuple containing 2D output C-tile index.
                   idxN0

      |<               mtx   N                 >|

        NPerBlock   NPerBlock   NPerBlock   NPerBlock
           N_0         N_1        N_2         N_3
  -   |-----------|-----------|-----------|-----|-----|-
  ^   | -   -  0  |/---->  2  |           |     |     |
      | |   |     /     |     |           |     |     |  M_0  MPerBlock
      | M   |    /|     |     |           |     |     |
      |-0---|---/-|-----|-----|-----------|-----|-----|-
      | 1   |  /  |     |     |  blockid  |     |     |

idxM0 | | | / | V | 5 | | | M_1 MPerBlock | - V 1 | - 3 | | | | |--------—|--------—|--------—|--—|--—|- mtx M | | | | | | | | | | | | M_2 MPerBlock | | | | | | |--------—|--------—|--------—|--—|--—|- | | | | | | | | | | | | M_3 MPerBlock | | | | | | |--------—|--------—|--------—|--—|--—|- V | | | | | |

  • |--------—|--------—|--------—|--—|--—|- M_4 MPerBlock | | | | | | |--------—|--------—|--------—|--—|--—|- Example: assume: M0 = 5 N0 = 4 block_1d_id = 5 M01 = 2

idx_N0 = 1 idx_M0 = 1 M01_adapt = 2 idx_M00 = 0 idx_M01 = 1 idx_N0_M01_local = 5 output {1, 2}

◆ GridSize()

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
CK_TILE_HOST_DEVICE auto ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::GridSize ( index_t M,
index_t N )->index_t
inlinestaticnoexcept

Calculates GEMM kernel grid size.

Parameters
MGEMM's M dimension.
NGEMM's N dimension.
Returns
index_t A total number of workgroups.

Member Data Documentation

◆ KPerBlock

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
index_t ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::KPerBlock = BlockGemmShape::kK
staticconstexpr

◆ MPerBlock

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
index_t ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::MPerBlock = BlockGemmShape::kM
staticconstexpr

◆ NPerBlock

template<typename BlockGemmShapeType, index_t GroupNum, index_t M01>
index_t ck_tile::GemmSpatiallyLocalTilePartitioner< BlockGemmShapeType, GroupNum, M01 >::NPerBlock = BlockGemmShape::kN
staticconstexpr

The documentation for this struct was generated from the following file: