1 | /******************************************************************************* |
2 | * Copyright 2020-2021 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | #include <numeric> |
17 | #include "common/broadcast_strategy.hpp" |
18 | #include "cpu/x64/injectors/injector_utils.hpp" |
19 | |
20 | namespace dnnl { |
21 | namespace impl { |
22 | namespace cpu { |
23 | namespace x64 { |
24 | namespace injector_utils { |
25 | |
26 | static std::size_t get_vmm_size_bytes(const Xbyak::Xmm &vmm) { |
27 | static constexpr int byte_size_bits = 8; |
28 | return vmm.getBit() / byte_size_bits; |
29 | } |
30 | |
31 | static std::size_t calc_vmm_to_preserve_size_bytes( |
32 | const std::initializer_list<Xbyak::Xmm> &vmm_to_preserve) { |
33 | |
34 | return std::accumulate(vmm_to_preserve.begin(), vmm_to_preserve.end(), |
35 | std::size_t(0u), [](std::size_t accum, const Xbyak::Xmm &vmm) { |
36 | return accum + get_vmm_size_bytes(vmm); |
37 | }); |
38 | } |
39 | |
40 | register_preserve_guard_t::register_preserve_guard_t(jit_generator *host, |
41 | std::initializer_list<Xbyak::Reg64> reg64_to_preserve, |
42 | std::initializer_list<Xbyak::Xmm> vmm_to_preserve) |
43 | : host_(host) |
44 | , reg64_stack_(reg64_to_preserve) |
45 | , vmm_stack_(vmm_to_preserve) |
46 | , vmm_to_preserve_size_bytes_( |
47 | calc_vmm_to_preserve_size_bytes(vmm_to_preserve)) { |
48 | |
49 | for (const auto ® : reg64_to_preserve) |
50 | host_->push(reg); |
51 | |
52 | if (!vmm_stack_.empty()) { |
53 | host_->sub(host_->rsp, vmm_to_preserve_size_bytes_); |
54 | |
55 | auto stack_offset = vmm_to_preserve_size_bytes_; |
56 | for (const auto &vmm : vmm_to_preserve) { |
57 | stack_offset -= get_vmm_size_bytes(vmm); |
58 | const auto idx = vmm.getIdx(); |
59 | if (vmm.isXMM()) |
60 | host_->uni_vmovups( |
61 | host_->ptr[host_->rsp + stack_offset], Xbyak::Xmm(idx)); |
62 | else if (vmm.isYMM()) |
63 | host_->uni_vmovups( |
64 | host_->ptr[host_->rsp + stack_offset], Xbyak::Ymm(idx)); |
65 | else |
66 | host_->uni_vmovups( |
67 | host_->ptr[host_->rsp + stack_offset], Xbyak::Zmm(idx)); |
68 | } |
69 | } |
70 | } |
71 | |
72 | register_preserve_guard_t::~register_preserve_guard_t() { |
73 | |
74 | auto tmp_stack_offset = 0; |
75 | |
76 | while (!vmm_stack_.empty()) { |
77 | const Xbyak::Xmm &vmm = vmm_stack_.top(); |
78 | const auto idx = vmm.getIdx(); |
79 | if (vmm.isXMM()) |
80 | host_->uni_vmovups( |
81 | Xbyak::Xmm(idx), host_->ptr[host_->rsp + tmp_stack_offset]); |
82 | else if (vmm.isYMM()) |
83 | host_->uni_vmovups( |
84 | Xbyak::Ymm(idx), host_->ptr[host_->rsp + tmp_stack_offset]); |
85 | else |
86 | host_->uni_vmovups( |
87 | Xbyak::Zmm(idx), host_->ptr[host_->rsp + tmp_stack_offset]); |
88 | |
89 | tmp_stack_offset += get_vmm_size_bytes(vmm); |
90 | vmm_stack_.pop(); |
91 | } |
92 | |
93 | if (vmm_to_preserve_size_bytes_) |
94 | host_->add(host_->rsp, vmm_to_preserve_size_bytes_); |
95 | |
96 | while (!reg64_stack_.empty()) { |
97 | host_->pop(reg64_stack_.top()); |
98 | reg64_stack_.pop(); |
99 | } |
100 | } |
101 | |
102 | size_t register_preserve_guard_t::stack_space_occupied() const { |
103 | constexpr static size_t reg64_size = 8; |
104 | const size_t stack_space_occupied |
105 | = vmm_to_preserve_size_bytes_ + reg64_stack_.size() * reg64_size; |
106 | |
107 | return stack_space_occupied; |
108 | }; |
109 | |
110 | conditional_register_preserve_guard_t::conditional_register_preserve_guard_t( |
111 | bool condition_to_be_met, jit_generator *host, |
112 | std::initializer_list<Xbyak::Reg64> reg64_to_preserve, |
113 | std::initializer_list<Xbyak::Xmm> vmm_to_preserve) |
114 | : register_preserve_guard_t {condition_to_be_met |
115 | ? register_preserve_guard_t {host, reg64_to_preserve, |
116 | vmm_to_preserve} |
117 | : register_preserve_guard_t {nullptr, {}, {}}} {}; |
118 | |
119 | } // namespace injector_utils |
120 | } // namespace x64 |
121 | } // namespace cpu |
122 | } // namespace impl |
123 | } // namespace dnnl |
124 | |