1 | /******************************************************************************* |
2 | * Copyright 2020 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CPU_X64_LRN_JIT_AVX512_COMMON_LRN_FWD_BLOCKED_HPP |
18 | #define CPU_X64_LRN_JIT_AVX512_COMMON_LRN_FWD_BLOCKED_HPP |
19 | |
20 | #include "cpu/x64/lrn/jit_avx512_common_lrn_fwd_base.hpp" |
21 | #include "cpu/x64/lrn/jit_avx512_common_lrn_utils.hpp" |
22 | |
23 | namespace dnnl { |
24 | namespace impl { |
25 | namespace cpu { |
26 | namespace x64 { |
27 | namespace lrn { |
28 | |
29 | template <data_type_t d_type> |
30 | class jit_avx512_common_lrn_kernel_fwd_blocked_t |
31 | : public jit_avx512_common_lrn_kernel_fwd_t<d_type> { |
32 | |
33 | public: |
34 | DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_common_lrn_kernel_fwd_blocked_t) |
35 | |
36 | jit_avx512_common_lrn_kernel_fwd_blocked_t(const struct nChw16c_across_t &J, |
37 | prop_kind_t prop_kind, int use_h_parallel, float alpha, float beta, |
38 | float k, int local_size, void *code_ptr = nullptr, |
39 | size_t code_size = 2 * Xbyak::DEFAULT_MAX_CODE_SIZE); |
40 | |
41 | void compute_loop(int loop_size_param); |
42 | |
43 | private: |
44 | void generate() override; |
45 | using data_t = typename jit_avx512_common_lrn_kernel_fwd_t<d_type>::data_t; |
46 | |
47 | int xmm_size_, zmm_size_, buffer_block_, buffer_nest_offset_, |
48 | src_prev_offset_, HW_, W_; |
49 | across_version version_; |
50 | const Reg64 t_ = rsp; |
51 | const Reg64 hw_ = r9; |
52 | |
53 | static constexpr int xsrc_prev_ = 3; |
54 | static constexpr int xsrc_next_ = 4; |
55 | int use_h_parallelism_; |
56 | }; |
57 | |
58 | } // namespace lrn |
59 | } // namespace x64 |
60 | } // namespace cpu |
61 | } // namespace impl |
62 | } // namespace dnnl |
63 | |
64 | #endif |
65 | |