1/*******************************************************************************
2* Copyright 2017-2020 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef CPU_X64_CPU_BARRIER_HPP
18#define CPU_X64_CPU_BARRIER_HPP
19
20#include <assert.h>
21
22#include "common/utils.hpp"
23#include "cpu/x64/jit_generator.hpp"
24
25namespace dnnl {
26namespace impl {
27namespace cpu {
28namespace x64 {
29
30namespace simple_barrier {
31
32#ifdef _WIN32
33#define CTX_ALIGNMENT 64
34#else
35#define CTX_ALIGNMENT 4096
36#endif
37
38STRUCT_ALIGN(
39 CTX_ALIGNMENT, struct ctx_t {
40 enum { CACHE_LINE_SIZE = 64 };
41 volatile size_t ctr;
42 char pad1[CACHE_LINE_SIZE - 1 * sizeof(size_t)];
43 volatile size_t sense;
44 char pad2[CACHE_LINE_SIZE - 1 * sizeof(size_t)];
45 });
46
47/* TODO: remove ctx_64_t once batch normalization switches to barrier-less
48 * implementation.
49 * Different alignments of context structure affect performance differently for
50 * convolution and batch normalization. Convolution performance becomes more
51 * stable with page alignment compared to cache line size alignment.
52 * Batch normalization (that creates C / simd_w barriers) degrades with page
53 * alignment due to significant overhead of ctx_init in case of mb=1. */
54STRUCT_ALIGN(
55 64, struct ctx_64_t {
56 enum { CACHE_LINE_SIZE = 64 };
57 volatile size_t ctr;
58 char pad1[CACHE_LINE_SIZE - 1 * sizeof(size_t)];
59 volatile size_t sense;
60 char pad2[CACHE_LINE_SIZE - 1 * sizeof(size_t)];
61 });
62
63template <typename ctx_t>
64inline void ctx_init(ctx_t *ctx) {
65 *ctx = utils::zero<ctx_t>();
66}
67void barrier(ctx_t *ctx, int nthr);
68
69/** injects actual barrier implementation into another jitted code
70 * @params:
71 * code -- jit_generator object where the barrier is to be injected
72 * reg_ctx -- read-only register with pointer to the barrier context
73 * reg_nnthr -- read-only register with the # of synchronizing threads
74 */
75void generate(jit_generator &code, Xbyak::Reg64 reg_ctx, Xbyak::Reg64 reg_nthr);
76
77} // namespace simple_barrier
78
79} // namespace x64
80} // namespace cpu
81} // namespace impl
82} // namespace dnnl
83
84#endif
85
86// vim: et ts=4 sw=4 cindent cino+=l0,\:4,N-s
87