1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * This source code is licensed under the BSD-style license found in the |
5 | * LICENSE file in the root directory of this source tree. |
6 | */ |
7 | #pragma once |
8 | #include <cstdint> |
9 | #include "./GenerateKernel.h" |
10 | #include "fbgemm/Fbgemm.h" |
11 | |
12 | namespace fbgemm { |
13 | |
14 | /** |
15 | * @brief Execute Engine for the macro-kernel and output processing. |
16 | * ExecuteKernel is a derived class of CodeGenBase. |
17 | */ |
18 | template < |
19 | typename packingAMatrix, |
20 | typename packingBMatrix, |
21 | typename cT, |
22 | typename processOutputType> |
23 | class ExecuteKernel : public CodeGenBase< |
24 | typename packingAMatrix::inpType, |
25 | typename packingBMatrix::inpType, |
26 | cT, |
27 | typename packingBMatrix::accType> { |
28 | public: |
29 | ExecuteKernel( |
30 | PackMatrix< |
31 | packingAMatrix, |
32 | typename packingAMatrix::inpType, |
33 | typename packingAMatrix::accType>& packA, |
34 | PackMatrix< |
35 | packingBMatrix, |
36 | typename packingBMatrix::inpType, |
37 | typename packingBMatrix::accType>& packB, |
38 | cT* matC, |
39 | typename packingBMatrix::accType* C_buffer, |
40 | int32_t ldc, |
41 | const processOutputType& outputProcess, |
42 | thread_type_t th_info, |
43 | const BlockingFactors* params = nullptr); |
44 | void execute(int kBlock); |
45 | |
46 | private: |
47 | PackMatrix< |
48 | packingAMatrix, |
49 | typename packingAMatrix::inpType, |
50 | typename packingAMatrix::accType>& |
51 | packedA_; ///< Packed block of matrix A. |
52 | PackMatrix< |
53 | packingBMatrix, |
54 | typename packingBMatrix::inpType, |
55 | typename packingBMatrix::accType>& packedB_; ///< Packed matrix B. |
56 | cT* matC_; ///< Output for matrix C. |
57 | typename packingAMatrix::accType* |
58 | C_buffer_; ///< the accumulation buffer for matrix C. |
59 | int32_t ldc_; ///< the leading dimension of matrix C. |
60 | const processOutputType& outputProcess_; ///< output processing function for |
61 | ///< the C tile in the macro-kernel. |
62 | }; |
63 | |
64 | } // namespace fbgemm |
65 | |