1 | /******************************************************************************* |
2 | * Copyright 2017-2020 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CPU_X64_CPU_BARRIER_HPP |
18 | #define CPU_X64_CPU_BARRIER_HPP |
19 | |
20 | #include <assert.h> |
21 | |
22 | #include "common/utils.hpp" |
23 | #include "cpu/x64/jit_generator.hpp" |
24 | |
25 | namespace dnnl { |
26 | namespace impl { |
27 | namespace cpu { |
28 | namespace x64 { |
29 | |
30 | namespace simple_barrier { |
31 | |
32 | #ifdef _WIN32 |
33 | #define CTX_ALIGNMENT 64 |
34 | #else |
35 | #define CTX_ALIGNMENT 4096 |
36 | #endif |
37 | |
38 | STRUCT_ALIGN( |
39 | CTX_ALIGNMENT, struct ctx_t { |
40 | enum { CACHE_LINE_SIZE = 64 }; |
41 | volatile size_t ctr; |
42 | char pad1[CACHE_LINE_SIZE - 1 * sizeof(size_t)]; |
43 | volatile size_t sense; |
44 | char pad2[CACHE_LINE_SIZE - 1 * sizeof(size_t)]; |
45 | }); |
46 | |
47 | /* TODO: remove ctx_64_t once batch normalization switches to barrier-less |
48 | * implementation. |
49 | * Different alignments of context structure affect performance differently for |
50 | * convolution and batch normalization. Convolution performance becomes more |
51 | * stable with page alignment compared to cache line size alignment. |
52 | * Batch normalization (that creates C / simd_w barriers) degrades with page |
53 | * alignment due to significant overhead of ctx_init in case of mb=1. */ |
54 | STRUCT_ALIGN( |
55 | 64, struct ctx_64_t { |
56 | enum { CACHE_LINE_SIZE = 64 }; |
57 | volatile size_t ctr; |
58 | char pad1[CACHE_LINE_SIZE - 1 * sizeof(size_t)]; |
59 | volatile size_t sense; |
60 | char pad2[CACHE_LINE_SIZE - 1 * sizeof(size_t)]; |
61 | }); |
62 | |
63 | template <typename ctx_t> |
64 | inline void ctx_init(ctx_t *ctx) { |
65 | *ctx = utils::zero<ctx_t>(); |
66 | } |
67 | void barrier(ctx_t *ctx, int nthr); |
68 | |
69 | /** injects actual barrier implementation into another jitted code |
70 | * @params: |
71 | * code -- jit_generator object where the barrier is to be injected |
72 | * reg_ctx -- read-only register with pointer to the barrier context |
73 | * reg_nnthr -- read-only register with the # of synchronizing threads |
74 | */ |
75 | void generate(jit_generator &code, Xbyak::Reg64 reg_ctx, Xbyak::Reg64 reg_nthr); |
76 | |
77 | } // namespace simple_barrier |
78 | |
79 | } // namespace x64 |
80 | } // namespace cpu |
81 | } // namespace impl |
82 | } // namespace dnnl |
83 | |
84 | #endif |
85 | |
86 | // vim: et ts=4 sw=4 cindent cino+=l0,\:4,N-s |
87 | |