1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * This source code is licensed under the BSD-style license found in the |
5 | * LICENSE file in the root directory of this source tree. |
6 | */ |
7 | #pragma once |
8 | |
9 | #include "fbgemm/FbgemmBuild.h" |
10 | |
11 | #include <cstdint> |
12 | |
13 | namespace fbgemm { |
14 | |
15 | /** |
16 | * @brief Reference implementation of matrix transposition: B = A^T. |
17 | * @param M The height of the matrix. |
18 | * @param N The width of the matrix. |
19 | * @param src The memory buffer of the source matrix A. |
20 | * @param ld_src The leading dimension of the source matrix A. |
21 | * @param dst The memory buffer of the destination matrix B. |
22 | * @param ld_dst The leading dimension of the destination matrix B. |
23 | */ |
24 | template <typename T> |
25 | FBGEMM_API void transpose_ref( |
26 | int64_t M, |
27 | int64_t N, |
28 | const T* src, |
29 | int64_t ld_src, |
30 | T* dst, |
31 | int64_t ld_dst); |
32 | |
33 | namespace internal { |
34 | |
35 | /** |
36 | * @brief Transpose a matrix using Intel AVX2. |
37 | * |
38 | * This is called if the code is running on a CPU with Intel AVX2 support. |
39 | */ |
40 | template <typename T> |
41 | void transpose_avx2( |
42 | int64_t M, |
43 | int64_t N, |
44 | const T* src, |
45 | int64_t ld_src, |
46 | T* dst, |
47 | int64_t ld_dst); |
48 | |
49 | /** |
50 | * @brief Transpose a matrix using Intel AVX512. |
51 | * |
52 | * This is called if the code is running on a CPU with Intel AVX512 support. |
53 | */ |
54 | template <typename T> |
55 | void transpose_avx512( |
56 | int64_t M, |
57 | int64_t N, |
58 | const T* src, |
59 | int64_t ld_src, |
60 | T* dst, |
61 | int64_t ld_dst); |
62 | |
63 | } // namespace internal |
64 | |
65 | } // namespace fbgemm |
66 | |