1/*******************************************************************************
2* Copyright 2020-2021 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16#include <numeric>
17#include "common/broadcast_strategy.hpp"
18#include "cpu/x64/injectors/injector_utils.hpp"
19
20namespace dnnl {
21namespace impl {
22namespace cpu {
23namespace x64 {
24namespace injector_utils {
25
26static std::size_t get_vmm_size_bytes(const Xbyak::Xmm &vmm) {
27 static constexpr int byte_size_bits = 8;
28 return vmm.getBit() / byte_size_bits;
29}
30
31static std::size_t calc_vmm_to_preserve_size_bytes(
32 const std::initializer_list<Xbyak::Xmm> &vmm_to_preserve) {
33
34 return std::accumulate(vmm_to_preserve.begin(), vmm_to_preserve.end(),
35 std::size_t(0u), [](std::size_t accum, const Xbyak::Xmm &vmm) {
36 return accum + get_vmm_size_bytes(vmm);
37 });
38}
39
40register_preserve_guard_t::register_preserve_guard_t(jit_generator *host,
41 std::initializer_list<Xbyak::Reg64> reg64_to_preserve,
42 std::initializer_list<Xbyak::Xmm> vmm_to_preserve)
43 : host_(host)
44 , reg64_stack_(reg64_to_preserve)
45 , vmm_stack_(vmm_to_preserve)
46 , vmm_to_preserve_size_bytes_(
47 calc_vmm_to_preserve_size_bytes(vmm_to_preserve)) {
48
49 for (const auto &reg : reg64_to_preserve)
50 host_->push(reg);
51
52 if (!vmm_stack_.empty()) {
53 host_->sub(host_->rsp, vmm_to_preserve_size_bytes_);
54
55 auto stack_offset = vmm_to_preserve_size_bytes_;
56 for (const auto &vmm : vmm_to_preserve) {
57 stack_offset -= get_vmm_size_bytes(vmm);
58 const auto idx = vmm.getIdx();
59 if (vmm.isXMM())
60 host_->uni_vmovups(
61 host_->ptr[host_->rsp + stack_offset], Xbyak::Xmm(idx));
62 else if (vmm.isYMM())
63 host_->uni_vmovups(
64 host_->ptr[host_->rsp + stack_offset], Xbyak::Ymm(idx));
65 else
66 host_->uni_vmovups(
67 host_->ptr[host_->rsp + stack_offset], Xbyak::Zmm(idx));
68 }
69 }
70}
71
72register_preserve_guard_t::~register_preserve_guard_t() {
73
74 auto tmp_stack_offset = 0;
75
76 while (!vmm_stack_.empty()) {
77 const Xbyak::Xmm &vmm = vmm_stack_.top();
78 const auto idx = vmm.getIdx();
79 if (vmm.isXMM())
80 host_->uni_vmovups(
81 Xbyak::Xmm(idx), host_->ptr[host_->rsp + tmp_stack_offset]);
82 else if (vmm.isYMM())
83 host_->uni_vmovups(
84 Xbyak::Ymm(idx), host_->ptr[host_->rsp + tmp_stack_offset]);
85 else
86 host_->uni_vmovups(
87 Xbyak::Zmm(idx), host_->ptr[host_->rsp + tmp_stack_offset]);
88
89 tmp_stack_offset += get_vmm_size_bytes(vmm);
90 vmm_stack_.pop();
91 }
92
93 if (vmm_to_preserve_size_bytes_)
94 host_->add(host_->rsp, vmm_to_preserve_size_bytes_);
95
96 while (!reg64_stack_.empty()) {
97 host_->pop(reg64_stack_.top());
98 reg64_stack_.pop();
99 }
100}
101
102size_t register_preserve_guard_t::stack_space_occupied() const {
103 constexpr static size_t reg64_size = 8;
104 const size_t stack_space_occupied
105 = vmm_to_preserve_size_bytes_ + reg64_stack_.size() * reg64_size;
106
107 return stack_space_occupied;
108};
109
110conditional_register_preserve_guard_t::conditional_register_preserve_guard_t(
111 bool condition_to_be_met, jit_generator *host,
112 std::initializer_list<Xbyak::Reg64> reg64_to_preserve,
113 std::initializer_list<Xbyak::Xmm> vmm_to_preserve)
114 : register_preserve_guard_t {condition_to_be_met
115 ? register_preserve_guard_t {host, reg64_to_preserve,
116 vmm_to_preserve}
117 : register_preserve_guard_t {nullptr, {}, {}}} {};
118
119} // namespace injector_utils
120} // namespace x64
121} // namespace cpu
122} // namespace impl
123} // namespace dnnl
124