batched_transpose_kernel.hpp Source File#
batched_transpose_kernel.hpp
Go to the documentation of this file.
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_HOST_DEVICE constexpr auto make_naive_tensor_view(DataType *__restrict__ p, const tuple< Lengths... > &lengths, const tuple< Strides... > &strides, number< GuaranteedLastDimensionVectorLength >=number<-1 >{}, number< GuaranteedLastDimensionVectorStride >=number<-1 >{})
Definition tensor_view.hpp:471
__device__ uint32_t amd_wave_read_first_lane(uint16_t v)
Definition tile/core/arch/amd_buffer_addressing.hpp:35
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
Definition tile/core/numeric/math.hpp:149
CK_TILE_HOST_DEVICE constexpr auto pad_tensor_view(const TensorView &tensor_view, const TileLengths &tile_lengths, DoPads)
Definition tensor_view.hpp:530
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
Definition batched_transpose_kernel.hpp:16
void * p_output
Definition batched_transpose_kernel.hpp:18
index_t dim_block_w
Definition batched_transpose_kernel.hpp:24
index_t dim_stride
Definition batched_transpose_kernel.hpp:22
const void * p_input
Definition batched_transpose_kernel.hpp:17
index_t dim_block_h
Definition batched_transpose_kernel.hpp:23
Definition batched_transpose_kernel.hpp:40
index_t width
Definition batched_transpose_kernel.hpp:45
index_t height
Definition batched_transpose_kernel.hpp:44
index_t dim_stride
Definition batched_transpose_kernel.hpp:46
index_t batch
Definition batched_transpose_kernel.hpp:43
const void * p_input
Definition batched_transpose_kernel.hpp:41
void * p_output
Definition batched_transpose_kernel.hpp:42
Definition batched_transpose_kernel.hpp:29
static CK_TILE_HOST constexpr auto GridSize(const Hargs &host_args)
Definition batched_transpose_kernel.hpp:52
remove_cvref_t< typename Pipeline::Problem > Problem
Definition batched_transpose_kernel.hpp:33
CK_TILE_DEVICE void operator()(Kargs kargs) const
Definition batched_transpose_kernel.hpp:76
static CK_TILE_HOST constexpr auto MakeKargs(const Hargs &h)
Definition batched_transpose_kernel.hpp:62
static CK_TILE_HOST constexpr auto BlockSize()
Definition batched_transpose_kernel.hpp:74
typename Problem::DataType Type
Definition batched_transpose_kernel.hpp:35
static CK_TILE_DEVICE index_t counter
Definition batched_transpose_kernel.hpp:31
BatchedTransposeHostArgs Hargs
Definition batched_transpose_kernel.hpp:50
remove_cvref_t< Pipeline_ > Pipeline
Definition batched_transpose_kernel.hpp:32
BatchedTransposeKargs Kargs
Definition batched_transpose_kernel.hpp:49
static constexpr index_t kBlockSize
Definition batched_transpose_kernel.hpp:37
Definition coordinate_transform.hpp:1392
Definition tile/core/container/sequence.hpp:49