1 | /******************************************************************************* |
2 | * Copyright 2017-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include <assert.h> |
18 | |
19 | #include "cpu/x64/cpu_barrier.hpp" |
20 | |
21 | namespace dnnl { |
22 | namespace impl { |
23 | namespace cpu { |
24 | namespace x64 { |
25 | |
26 | namespace simple_barrier { |
27 | |
28 | void generate( |
29 | jit_generator &code, Xbyak::Reg64 reg_ctx, Xbyak::Reg64 reg_nthr) { |
30 | #define BAR_CTR_OFF offsetof(ctx_t, ctr) |
31 | #define BAR_SENSE_OFF offsetof(ctx_t, sense) |
32 | using namespace Xbyak; |
33 | |
34 | Xbyak::Reg64 reg_tmp = [&]() { |
35 | /* returns register which is neither reg_ctx nor reg_nthr */ |
36 | Xbyak::Reg64 regs[] = {util::rax, util::rbx, util::rcx}; |
37 | for (size_t i = 0; i < sizeof(regs) / sizeof(regs[0]); ++i) |
38 | if (!utils::one_of(regs[i], reg_ctx, reg_nthr)) return regs[i]; |
39 | return regs[0]; /* should not happen */ |
40 | }(); |
41 | |
42 | Label barrier_exit_label, barrier_exit_restore_label, spin_label; |
43 | |
44 | code.cmp(reg_nthr, 1); |
45 | code.jbe(barrier_exit_label); |
46 | |
47 | code.push(reg_tmp); |
48 | |
49 | /* take and save current sense */ |
50 | code.mov(reg_tmp, code.ptr[reg_ctx + BAR_SENSE_OFF]); |
51 | code.push(reg_tmp); |
52 | code.mov(reg_tmp, 1); |
53 | |
54 | code.lock(); |
55 | code.xadd(code.ptr[reg_ctx + BAR_CTR_OFF], reg_tmp); |
56 | code.add(reg_tmp, 1); |
57 | code.cmp(reg_tmp, reg_nthr); |
58 | code.pop(reg_tmp); /* restore previous sense */ |
59 | code.jne(spin_label); |
60 | |
61 | /* the last thread {{{ */ |
62 | code.mov(code.qword[reg_ctx + BAR_CTR_OFF], 0); // reset ctx |
63 | |
64 | // notify waiting threads |
65 | code.not_(reg_tmp); |
66 | code.mov(code.ptr[reg_ctx + BAR_SENSE_OFF], reg_tmp); |
67 | code.jmp(barrier_exit_restore_label); |
68 | /* }}} the last thread */ |
69 | |
70 | code.CodeGenerator::L(spin_label); |
71 | code.pause(); |
72 | code.cmp(reg_tmp, code.ptr[reg_ctx + BAR_SENSE_OFF]); |
73 | code.je(spin_label); |
74 | |
75 | code.CodeGenerator::L(barrier_exit_restore_label); |
76 | code.pop(reg_tmp); |
77 | |
78 | code.CodeGenerator::L(barrier_exit_label); |
79 | #undef BAR_CTR_OFF |
80 | #undef BAR_SENSE_OFF |
81 | } |
82 | |
83 | /** jit barrier generator */ |
84 | struct jit_t : public jit_generator { |
85 | |
86 | void generate() override { |
87 | simple_barrier::generate(*this, abi_param1, abi_param2); |
88 | ret(); |
89 | } |
90 | |
91 | // TODO: Need to check status |
92 | jit_t() : jit_generator(jit_name()) { create_kernel(); } |
93 | |
94 | DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_t) |
95 | }; |
96 | |
97 | void barrier(ctx_t *ctx, int nthr) { |
98 | static jit_t j; /* XXX: constructed on load ... */ |
99 | j(ctx, nthr); |
100 | } |
101 | |
102 | } // namespace simple_barrier |
103 | |
104 | } // namespace x64 |
105 | } // namespace cpu |
106 | } // namespace impl |
107 | } // namespace dnnl |
108 | |
109 | // vim: et ts=4 sw=4 cindent cino+=l0,\:4,N-s |
110 | |