1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_JIT_IR_BLOCK_2D_UTILS_HPP
18#define GPU_JIT_IR_BLOCK_2D_UTILS_HPP
19
20#include <algorithm>
21
22#include "gpu/jit/ir/hw_config.hpp"
23
24namespace dnnl {
25namespace impl {
26namespace gpu {
27namespace jit {
28
29inline int block_2d_base_alignment(const hw_config_t &hw_cfg) {
30 ir_assert(hw_cfg.hw() >= ngen::HW::XeHPC);
31 // XXX: A steppings require 128 byte alignment due to a HW bug.
32 if (hw_cfg.stepping_id() <= 6) return 128;
33 return 64;
34}
35
36inline int block_2d_x_alignment(int type_size) {
37 return std::max(4, type_size) / type_size;
38}
39
40inline bool block_2d_width_ok(int width, int type_size) {
41 int width_bytes = width * type_size;
42 if (width_bytes < 64) return false;
43 if (width_bytes > (1 << 24)) return false;
44 if (width_bytes % std::max(4, type_size) != 0) return false;
45 return true;
46}
47
48inline bool block_2d_height_ok(int height) {
49 if (height > (1 << 24)) return false;
50 return true;
51}
52
53inline bool block_2d_pitch_ok(const hw_config_t &hw_cfg, int pitch,
54 int type_size, bool use_xy = true) {
55 int pitch_bytes = pitch * type_size;
56 if (pitch_bytes < 64) return false;
57 if (pitch_bytes > (1 << 24)) return false;
58 if (pitch_bytes % 16 != 0) return false;
59 // To be able to point the base to different rows.
60 if (use_xy && pitch_bytes % block_2d_base_alignment(hw_cfg) != 0)
61 return false;
62 return true;
63}
64
65inline int block_2d_max_count(
66 bool is_store, bool is_transpose, int block_width, int type_size) {
67 if (is_store || is_transpose) return 1;
68 return 64 / (block_width * type_size);
69}
70
71} // namespace jit
72} // namespace gpu
73} // namespace impl
74} // namespace dnnl
75
76#endif
77