1 | /******************************************************************************* |
2 | * Copyright 2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_JIT_IR_BLOCK_2D_UTILS_HPP |
18 | #define GPU_JIT_IR_BLOCK_2D_UTILS_HPP |
19 | |
20 | #include <algorithm> |
21 | |
22 | #include "gpu/jit/ir/hw_config.hpp" |
23 | |
24 | namespace dnnl { |
25 | namespace impl { |
26 | namespace gpu { |
27 | namespace jit { |
28 | |
29 | inline int block_2d_base_alignment(const hw_config_t &hw_cfg) { |
30 | ir_assert(hw_cfg.hw() >= ngen::HW::XeHPC); |
31 | // XXX: A steppings require 128 byte alignment due to a HW bug. |
32 | if (hw_cfg.stepping_id() <= 6) return 128; |
33 | return 64; |
34 | } |
35 | |
36 | inline int block_2d_x_alignment(int type_size) { |
37 | return std::max(4, type_size) / type_size; |
38 | } |
39 | |
40 | inline bool block_2d_width_ok(int width, int type_size) { |
41 | int width_bytes = width * type_size; |
42 | if (width_bytes < 64) return false; |
43 | if (width_bytes > (1 << 24)) return false; |
44 | if (width_bytes % std::max(4, type_size) != 0) return false; |
45 | return true; |
46 | } |
47 | |
48 | inline bool block_2d_height_ok(int height) { |
49 | if (height > (1 << 24)) return false; |
50 | return true; |
51 | } |
52 | |
53 | inline bool block_2d_pitch_ok(const hw_config_t &hw_cfg, int pitch, |
54 | int type_size, bool use_xy = true) { |
55 | int pitch_bytes = pitch * type_size; |
56 | if (pitch_bytes < 64) return false; |
57 | if (pitch_bytes > (1 << 24)) return false; |
58 | if (pitch_bytes % 16 != 0) return false; |
59 | // To be able to point the base to different rows. |
60 | if (use_xy && pitch_bytes % block_2d_base_alignment(hw_cfg) != 0) |
61 | return false; |
62 | return true; |
63 | } |
64 | |
65 | inline int block_2d_max_count( |
66 | bool is_store, bool is_transpose, int block_width, int type_size) { |
67 | if (is_store || is_transpose) return 1; |
68 | return 64 / (block_width * type_size); |
69 | } |
70 | |
71 | } // namespace jit |
72 | } // namespace gpu |
73 | } // namespace impl |
74 | } // namespace dnnl |
75 | |
76 | #endif |
77 | |