1/*******************************************************************************
2* Copyright 2020 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef CPU_X64_LRN_JIT_AVX512_COMMON_LRN_FWD_BLOCKED_HPP
18#define CPU_X64_LRN_JIT_AVX512_COMMON_LRN_FWD_BLOCKED_HPP
19
20#include "cpu/x64/lrn/jit_avx512_common_lrn_fwd_base.hpp"
21#include "cpu/x64/lrn/jit_avx512_common_lrn_utils.hpp"
22
23namespace dnnl {
24namespace impl {
25namespace cpu {
26namespace x64 {
27namespace lrn {
28
29template <data_type_t d_type>
30class jit_avx512_common_lrn_kernel_fwd_blocked_t
31 : public jit_avx512_common_lrn_kernel_fwd_t<d_type> {
32
33public:
34 DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_common_lrn_kernel_fwd_blocked_t)
35
36 jit_avx512_common_lrn_kernel_fwd_blocked_t(const struct nChw16c_across_t &J,
37 prop_kind_t prop_kind, int use_h_parallel, float alpha, float beta,
38 float k, int local_size, void *code_ptr = nullptr,
39 size_t code_size = 2 * Xbyak::DEFAULT_MAX_CODE_SIZE);
40
41 void compute_loop(int loop_size_param);
42
43private:
44 void generate() override;
45 using data_t = typename jit_avx512_common_lrn_kernel_fwd_t<d_type>::data_t;
46
47 int xmm_size_, zmm_size_, buffer_block_, buffer_nest_offset_,
48 src_prev_offset_, HW_, W_;
49 across_version version_;
50 const Reg64 t_ = rsp;
51 const Reg64 hw_ = r9;
52
53 static constexpr int xsrc_prev_ = 3;
54 static constexpr int xsrc_next_ = 4;
55 int use_h_parallelism_;
56};
57
58} // namespace lrn
59} // namespace x64
60} // namespace cpu
61} // namespace impl
62} // namespace dnnl
63
64#endif
65