1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 * This source code is licensed under the BSD-style license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7#pragma once
8
9#include "fbgemm/FbgemmBuild.h"
10
11#include <cstdint>
12
13namespace fbgemm {
14
15/**
16 * @brief Reference implementation of matrix transposition: B = A^T.
17 * @param M The height of the matrix.
18 * @param N The width of the matrix.
19 * @param src The memory buffer of the source matrix A.
20 * @param ld_src The leading dimension of the source matrix A.
21 * @param dst The memory buffer of the destination matrix B.
22 * @param ld_dst The leading dimension of the destination matrix B.
23 */
24template <typename T>
25FBGEMM_API void transpose_ref(
26 int64_t M,
27 int64_t N,
28 const T* src,
29 int64_t ld_src,
30 T* dst,
31 int64_t ld_dst);
32
33namespace internal {
34
35/**
36 * @brief Transpose a matrix using Intel AVX2.
37 *
38 * This is called if the code is running on a CPU with Intel AVX2 support.
39 */
40template <typename T>
41void transpose_avx2(
42 int64_t M,
43 int64_t N,
44 const T* src,
45 int64_t ld_src,
46 T* dst,
47 int64_t ld_dst);
48
49/**
50 * @brief Transpose a matrix using Intel AVX512.
51 *
52 * This is called if the code is running on a CPU with Intel AVX512 support.
53 */
54template <typename T>
55void transpose_avx512(
56 int64_t M,
57 int64_t N,
58 const T* src,
59 int64_t ld_src,
60 T* dst,
61 int64_t ld_dst);
62
63} // namespace internal
64
65} // namespace fbgemm
66