TransposeUtils.h source code [pytorch/third_party/fbgemm/src/TransposeUtils.h]

1	/*
2	* Copyright (c) Meta Platforms, Inc. and affiliates.
3	* All rights reserved.
4	* This source code is licensed under the BSD-style license found in the
5	* LICENSE file in the root directory of this source tree.
6	*/
7	#pragma once
8
9	#include "fbgemm/FbgemmBuild.h"
10
11	#include <cstdint>
12
13	namespace fbgemm {
14
15	/**
16	* @brief Reference implementation of matrix transposition: B = A^T.
17	* @param M The height of the matrix.
18	* @param N The width of the matrix.
19	* @param src The memory buffer of the source matrix A.
20	* @param ld_src The leading dimension of the source matrix A.
21	* @param dst The memory buffer of the destination matrix B.
22	* @param ld_dst The leading dimension of the destination matrix B.
23	*/
24	template <typename T>
25	FBGEMM_API void transpose_ref(
26	int64_t M,
27	int64_t N,
28	const T* src,
29	int64_t ld_src,
30	T* dst,
31	int64_t ld_dst);
32
33	namespace internal {
34
35	/**
36	* @brief Transpose a matrix using Intel AVX2.
37	*
38	* This is called if the code is running on a CPU with Intel AVX2 support.
39	*/
40	template <typename T>
41	void transpose_avx2(
42	int64_t M,
43	int64_t N,
44	const T* src,
45	int64_t ld_src,
46	T* dst,
47	int64_t ld_dst);
48
49	/**
50	* @brief Transpose a matrix using Intel AVX512.
51	*
52	* This is called if the code is running on a CPU with Intel AVX512 support.
53	*/
54	template <typename T>
55	void transpose_avx512(
56	int64_t M,
57	int64_t N,
58	const T* src,
59	int64_t ld_src,
60	T* dst,
61	int64_t ld_dst);
62
63	} // namespace internal
64
65	} // namespace fbgemm
66

Browse the source code of pytorch/third_party/fbgemm/src/TransposeUtils.h