GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference
The Grouped Convolution Forward kernel template. More...
#include <grouped_convolution_forward_kernel.hpp>
Classes | |
| struct | SpatialCoords |
Public Types | |
| using | TilePartitioner = remove_cvref_t<TilePartitioner_> |
| using | GemmPipeline = remove_cvref_t<GemmPipeline_> |
| using | EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
| using | GemmALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
| using | GemmBLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
| using | GemmCLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
| using | InLayout = remove_cvref_t<typename GroupedConvTraitsType_::InLayout> |
| using | WeiLayout = remove_cvref_t<typename GroupedConvTraitsType_::WeiLayout> |
| using | OutLayout = remove_cvref_t<typename GroupedConvTraitsType_::OutLayout> |
| using | DsLayout = remove_cvref_t<typename GroupedConvTraitsType_::DsLayout> |
| using | GemmDsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout> |
| using | InDataType = remove_cvref_t<typename GemmPipeline::ADataType> |
| using | WeiDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
| using | DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType> |
| using | OutDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
| using | CDElementwise = typename EpiloguePipeline::CDElementwise |
| using | GroupedConvFwdKernelArgsSpecialized |
Public Member Functions | |
| CK_TILE_DEVICE void | operator() (GroupedConvFwdKernelArgsSpecialized kargs) const |
Static Public Member Functions | |
| static CK_TILE_DEVICE SpatialCoords | UnflattenSpatial (index_t flat, index_t h_size, index_t w_size) |
| static CK_TILE_DEVICE index_t | FlattenSpatial (index_t d, index_t h, index_t w, index_t total_h, index_t total_w) |
| template<typename SplitImageInfo> | |
| static CK_TILE_DEVICE index_t | FindPieceId (index_t block_id, const SplitImageInfo &split_info, index_t num_pieces) |
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST auto | GridSize (const GroupedConvFwdKernelArgsSpecialized &kargs) |
| static CK_TILE_HOST auto | BlockSize () |
| static CK_TILE_HOST constexpr GroupedConvFwdKernelArgsSpecialized | MakeKernelArgs (const GroupedConvFwdHostArgs< CDElementwise > &hostArgs) |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSize () |
| static CK_TILE_HOST bool | IsSupportedArgument (const GroupedConvFwdKernelArgsSpecialized &kargs) |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set, typename ADescType, typename BDescType, typename CDescType> | |
| static CK_TILE_DEVICE auto | MakeGemmTensorViews (const InDataType *a_ptr, const WeiDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, OutDataType *c_ptr, const ADescType &a_desc, const BDescType &b_desc, const CDescType &c_desc) |
| template<typename TensorView> | |
| static CK_TILE_DEVICE auto | MakeGemmPadViews (const TensorView &views) |
| template<typename PadView> | |
| static CK_TILE_DEVICE auto | MakeGemmTileWindows (const PadView &views, const index_t i_m, const index_t i_n) |
| template<typename ADescType, typename BDescType, typename CDescType> | |
| static CK_TILE_DEVICE void | RunGemm (const InDataType *a_ptr, const WeiDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, OutDataType *c_ptr, void *smem_ptr_0, const ADescType &a_desc, const BDescType &b_desc, const CDescType &c_desc, const index_t gemm_k, const index_t block_idx_m, const index_t block_idx_n) |
| Runs single GEMM problem cooperatively by whole workgroup. | |
| template<typename ADescType, typename BDescType, typename CDescType> | |
| static CK_TILE_DEVICE void | RunGemm2LDS (const InDataType *a_ptr, const WeiDataType *b_ptr, const std::array< const void *, NumDTensor > &ds_ptr, OutDataType *c_ptr, void *__restrict__ smem_ptr_0, void *__restrict__ smem_ptr_1, const ADescType &a_desc, const BDescType &b_desc, const CDescType &c_desc, const index_t gemm_k, const index_t block_idx_m, const index_t block_idx_n) |
| Runs single GEMM problem cooperatively by whole workgroup. | |
Static Public Attributes | |
| static constexpr bool | EnableSplitImage = GroupedConvTraitsType_::EnableSplitImage |
| static constexpr index_t | NDimSpatial = GroupedConvTraitsType_::NDimSpatial |
| static constexpr ConvolutionSpecialization | ConvSpecialization |
| static constexpr index_t | NumDTensor = GroupedConvTraitsType_::NumDTensor |
| static constexpr index_t | kBlockSize = GemmPipeline::BlockSize |
| static constexpr bool | IsSplitKSupported = false |
| static constexpr auto | I0 = number<0>() |
| static constexpr auto | I1 = number<1>() |
| static constexpr auto | I2 = number<2>() |
| static constexpr auto | I3 = number<3>() |
Detailed Description
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
struct ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >
struct ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >
The Grouped Convolution Forward kernel template.
Overview
This class provides the grouped convolution forward kernel template. By semantic division of Implicit GEMM algorithm into following parts we achieve flexible, versatile and robust kernel implementation.
- Prolog - The start of GEMM kernel implementation in operator() function call operator" which determines the work scope of each workgroup. @li @b GemmPipeline - The core part @a "heart" of matrix multiplication algorithm. This is the place where each workgroup is loading data from global memory and carrying out dot products. @li @b Epilogue - The @a "final" part of matrix multiplication implementation responsible for storing results to global memory. This is also the place where any additional operator fusion may take place. Additionally both @ref GemmPipeline_ "GemmPipeline" and @ref EpiloguePipeline_ "EpiloguePipeline" are parameterized with so called @a Policy which determines all internal details of those functional parts. You can think of it like both gemm and epilogue pipelines provides the control-flow logic controlled by policies. Moreover the policy is responsible for definition of all necessary data layouts and thread's work distribution. @tparam GroupedConvTraitsType_ The type of class providing traits for grouped convolution. @tparam TilePartitioner_ The type of class providing mapping of workgroup index into the output data tile to be calculated. It determines the workgroup to data relationship (or in other words - which data would be processed and calculated by which workgroup). @tparam GemmPipeline_ The type of class which provides the core part of matrix multiplication. This class should provide implementation of data loading from global memory and performing block-wise matrix multiplication. You can think of it as a work done by single workgroup point of view. @tparam EpiloguePipeline_ The type of class providing the final part of matrix multiplication implementation. It is responsible for storing results calculated by @ref GemmPipeline_ "GemmPipeline" to the output C tensor in global memory.
Member Typedef Documentation
◆ CDElementwise
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::CDElementwise = typename EpiloguePipeline::CDElementwise |
◆ DsDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType> |
◆ DsLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::DsLayout = remove_cvref_t<typename GroupedConvTraitsType_::DsLayout> |
◆ EpiloguePipeline
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
◆ GemmALayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
◆ GemmBLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmBLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
◆ GemmCLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmCLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
◆ GemmDsLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmDsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout> |
◆ GemmPipeline
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmPipeline = remove_cvref_t<GemmPipeline_> |
◆ GroupedConvFwdKernelArgsSpecialized
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GroupedConvFwdKernelArgsSpecialized |
Initial value:
The Grouped Convolution kernel device arguments.
Definition grouped_convolution_forward_kernel.hpp:24
◆ InDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::InDataType = remove_cvref_t<typename GemmPipeline::ADataType> |
◆ InLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::InLayout = remove_cvref_t<typename GroupedConvTraitsType_::InLayout> |
◆ OutDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::OutDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
◆ OutLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::OutLayout = remove_cvref_t<typename GroupedConvTraitsType_::OutLayout> |
◆ TilePartitioner
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_> |
◆ WeiDataType
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::WeiDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
◆ WeiLayout
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::GroupedConvolutionForwardKernel< GroupedConvTraitsType_, TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::WeiLayout = remove_cvref_t<typename GroupedConvTraitsType_::WeiLayout> |
Member Function Documentation
◆ BlockSize()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
◆ FindPieceId()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<typename SplitImageInfo>
|
inlinestatic |
◆ FlattenSpatial()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
◆ GetName()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticnodiscard |
◆ GetSmemSize()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticconstexpr |
◆ GridSize()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
◆ IsSupportedArgument()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
◆ MakeGemmPadViews()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<typename TensorView>
|
inlinestatic |
◆ MakeGemmTensorViews()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<memory_operation_enum DstInMemOp = memory_operation_enum::set, typename ADescType, typename BDescType, typename CDescType>
|
inlinestatic |
◆ MakeGemmTileWindows()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<typename PadView>
|
inlinestatic |
◆ MakeKernelArgs()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticconstexpr |
◆ operator()()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inline |
◆ RunGemm()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<typename ADescType, typename BDescType, typename CDescType>
|
inlinestatic |
Runs single GEMM problem cooperatively by whole workgroup.
- Parameters
-
a_ptr input A pointer b_ptr input B pointer ds_ptr input D tensors pointer array c_ptr output C pointer smem_ptr_0 The start memory pointer of the shared memory block. a_desc Input tensor A descriptor b_desc Weight tensor B descriptor c_desc Output tensor C descriptor gemm_k The GEMM K dimension block_idx_m The GEMM's output M dimension tile index processed by this workgroup. block_idx_n The GEMM's output N dimension tile index processed by this workgroup.
◆ RunGemm2LDS()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<typename ADescType, typename BDescType, typename CDescType>
|
inlinestatic |
Runs single GEMM problem cooperatively by whole workgroup.
- Note
- RunGEMM2LDS in with two shared memory buffers using the ping pong buffer mechanism.
- Parameters
-
a_ptr input A pointer b_ptr input B pointer ds_ptr input D tensors pointer array c_ptr output C pointer smem_ptr_0 The starting pointer of 1st shared memory block. smem_ptr_1 The starting pointer of 2nd shared memory block. a_desc Input tensor A descriptor b_desc Weight tensor B descriptor c_desc Output tensor C descriptor gemm_k The GEMM K dimension block_idx_m The GEMM's output M dimension tile index processed by this workgroup. block_idx_n The GEMM's output N dimension tile index processed by this workgroup.
◆ UnflattenSpatial()
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Member Data Documentation
◆ ConvSpecialization
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
Initial value:
=
GroupedConvTraitsType_::ConvSpecialization
◆ EnableSplitImage
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I0
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I1
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I2
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I3
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ IsSplitKSupported
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ kBlockSize
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ NDimSpatial
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ NumDTensor
template<typename GroupedConvTraitsType_, typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
The documentation for this struct was generated from the following file: