1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include "gpu/jit/conv/config_lookup_table.hpp"
18
19#include <string>
20#include <vector>
21#include <unordered_map>
22
23#include "gpu/jit/conv/config.hpp"
24#include "gpu/jit/utils/utils.hpp"
25
26namespace dnnl {
27namespace impl {
28namespace gpu {
29namespace jit {
30
31ngen::HW to_hw(const std::string &s) {
32 using namespace ir_utils;
33#define CASE(name) \
34 if (s == #name || s == to_lower(#name)) return ngen::HW::name;
35 CASE(XeHP)
36 CASE(XeHPG)
37 CASE(XeHPC)
38#undef CASE
39 ir_error_not_expected();
40 return ngen::HW::Unknown;
41}
42
43int_filter_t::int_filter_t(const std::string &s) {
44 cmp_op_ = op_kind_t::_eq;
45 if (s.empty()) {
46 value_ = 0;
47 return;
48 }
49 auto end = s.size();
50 auto last = s[end - 1];
51 if (last == '+') {
52 cmp_op_ = op_kind_t::_ge;
53 end--;
54 }
55 value_ = std::stoi(s.substr(0, end));
56}
57
58bool int_filter_t::matches(int value) const {
59 switch (cmp_op_) {
60 case op_kind_t::_eq: return value == value_;
61 case op_kind_t::_le: return value <= value_;
62 case op_kind_t::_ge: return value >= value_;
63 case op_kind_t::_lt: return value < value_;
64 case op_kind_t::_gt: return value > value_;
65 default: ir_error_not_expected();
66 }
67 return false;
68}
69
70type_filter_t::type_filter_t(const std::string &s) {
71 for (size_t pos = 0;;) {
72 bool found = false;
73 for (auto &p : all_patterns()) {
74 if (try_parse(s, pos, p)) {
75 found = true;
76 break;
77 }
78 }
79 if (!found) {
80 ir_assert(pos == s.size()) << s;
81 break;
82 }
83 }
84}
85
86bool type_filter_t::matches(const std::vector<data_type_t> &values) const {
87 ir_assert(values.size() == patterns_.size());
88 for (size_t i = 0; i < values.size(); i++) {
89 auto &ptrn = patterns_[i];
90 if (ptrn == "*") continue;
91 if (ptrn == "x8") {
92 if (!utils::one_of(values[i], data_type::s8, data_type::u8))
93 return false;
94 } else if (ptrn == "f32") {
95 if (values[i] != data_type::f32) return false;
96 } else if (ptrn == "bf16") {
97 if (values[i] != data_type::bf16) return false;
98 } else if (ptrn == "f16") {
99 if (values[i] != data_type::f16) return false;
100 } else {
101 ir_error_not_expected() << ptrn;
102 }
103 }
104 return true;
105}
106
107bool type_filter_t::try_parse(
108 const std::string &s, size_t &pos, const std::string &pattern) {
109 if (pos + pattern.size() > s.size()) return false;
110 if (!std::equal(pattern.begin(), pattern.end(), s.begin() + pos))
111 return false;
112 patterns_.push_back(s.substr(pos, pattern.size()));
113 pos = pos + pattern.size();
114 return true;
115}
116
117std::vector<std::string> &type_filter_t::all_patterns() {
118 static std::vector<std::string> ret = {
119 "x8",
120 "bf16",
121 "f16",
122 "f32",
123 "*",
124 };
125 return ret;
126}
127conv_problem_filter_t::conv_problem_filter_t(const std::string &s) {
128 auto parts = ir_utils::split(s, " ");
129 for (auto &part : parts) {
130 auto sub_parts = ir_utils::split(part, "=");
131 ir_assert(sub_parts.size() == 2) << part;
132 auto &name = sub_parts[0];
133 auto &value = sub_parts[1];
134 if (name == "hw") {
135 hw_ = to_hw(value);
136 } else if (name == "cfg") {
137 type_filter_ = type_filter_t(value);
138 } else if (name == "dir") {
139 dir_ = value;
140 } else if (name == "desc") {
141 desc_ = value;
142 } else if (name == "mb") {
143 mb_filter_ = int_filter_t(value);
144 } else if (name == "post_ops") {
145 post_ops_ = value;
146 } else {
147 ir_error_not_expected() << part;
148 }
149 }
150}
151
152bool conv_problem_filter_t::matches(
153 const conv_problem_t &prb, const hw_config_t &hw_cfg) const {
154 if (hw_cfg.hw() != hw_) return false;
155 if (!matches_dir(prb)) return false;
156 if (!type_filter_.matches(
157 {prb.src_data_type, prb.wei_data_type, prb.dst_data_type}))
158 return false;
159 if (!fpmath_filter_.matches(prb.fpmath_mode)) return false;
160 if (!mb_filter_.matches(prb.mb)) return false;
161 if (!matches_desc(prb)) return false;
162 if (!matches_post_ops(prb)) return false;
163 return true;
164}
165
166bool conv_problem_filter_t::matches_dir(const conv_problem_t &prb) const {
167 if (dir_.empty()) return true;
168 if (dir_ == "fwd") {
169 return prb.is_fwd;
170 } else if (dir_ == "bwd_d") {
171 return prb.is_bwd_d;
172 } else if (dir_ == "bwd_w") {
173 return prb.is_bwd_w;
174 } else {
175 ir_error_not_expected() << dir_;
176 }
177 return false;
178}
179
180bool conv_problem_filter_t::matches_desc(const conv_problem_t &prb) const {
181 return prb.desc_str(/*print_mb=*/false) == desc_;
182}
183
184bool conv_problem_filter_t::matches_post_ops(const conv_problem_t &prb) const {
185 if (post_ops_ == "*") return true;
186 if (post_ops_ == "sum") return prb.with_sum;
187 ir_assert(post_ops_.empty()) << post_ops_;
188 return !prb.with_sum;
189}
190
191conv_config_lookup_table_t::conv_config_lookup_table_t() {
192 // clang-format off
193 // wdsr
194 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih240iw135oc32oh240ow135kh3kw3ph1pw1", "fsp=1");
195 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic32ih240iw135oc128oh240ow135kh3kw3ph1pw1", "fsp=1");
196 // kuaishou noisy
197 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih56oc128oh56kh3ph1 post_ops=*", "fsp=1");
198 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih56oc256oh28kh1sh2ph0", "fsp=1");
199 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih56oc256oh28kh3sh2ph1", "fsp=1");
200 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic256ih28oc512oh14kh3sh2ph1", "fsp=1");
201 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic3ih448oc64oh224kh7sh2ph3", "fsp=1");
202 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic512ih14oc512oh14kh3ph1 post_ops=*", "fsp=1");
203 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic64ih112oc128oh56kh1sh2ph0", "fsp=1");
204 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic64ih112oc128oh56kh3sh2ph1", "fsp=1");
205 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic64ih112oc64oh112kh3ph1", "fsp=1");
206 // kuaishou block
207 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic512ih28iw21oc512oh28ow21kh3kw3ph1pw1 post_ops=*", "fsp=1");
208 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic256ih56iw42oc256oh56ow42kh3kw3ph1pw1 post_ops=*", "fsp=1");
209 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic3ih896iw672oc64oh448ow336kh7kw7sh2sw2ph3pw3", "fsp=1");
210 // kuaishou blur
211 //
212 add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic768iw900oc160ow900kw1pw0", "fsp=1");
213 add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic768iw900oc192ow900kw1pw0", "fsp=1");
214 add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic768ih30oc192oh30kh1ph0", "fsp=1");
215 add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic288ih61oc384oh30kh3sh2ph0", "fsp=1");
216 add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic192iw3721oc64ow3721kw1pw0", "fsp=1");
217 add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic192iw3721oc48ow3721kw1pw0", "fsp=1");
218 add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic32ih255oc32oh253kh3ph0", "fsp=1");
219 //
220 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic768iw900oc160ow900kw1pw0", "fsp=1");
221 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic768iw900oc192ow900kw1pw0", "fsp=1");
222 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic768ih30oc192oh30kh1ph0", "fsp=1");
223 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic288ih61oc384oh30kh3sh2ph0", "fsp=1");
224 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic192iw3721oc64ow3721kw1pw0", "fsp=1");
225 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic192iw3721oc48ow3721kw1pw0", "fsp=1");
226 add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic32ih255oc32oh253kh3ph0", "fsp=1");
227
228 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic2048iw49oc512ow49kw1pw0", "T=oc8mb1osp4 s=x3.g2.v4 c=0");
229 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic128iw784oc512ow784kw1pw0 post_ops=sum", "T=oc8mb1osp4 s=x2.g1.v1 c=0");
230 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1", "T=oc8mb1osp4 s=x3.g2.v4 c=0");
231 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0", "T=oc2mb1osp4 s=x3.g2.v2 c=0");
232 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0", "T=oc8mb1osp4 s=x3.g2.v4 c=0");
233 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "T=oc8mb4osp1 s=x3.g2.v4 c=0");
234 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256iw196oc1024ow196kw1pw0 post_ops=sum", "T=oc8mb1osp4 s=x3.g2.v2 c=0");
235 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc64ow3136kw1pw0", "T=oc4mb1osp8 s=x1.g1.v0 c=0");
236 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256ih14oc256oh14kh3ph1", "T=oc8mb4osp1 s=x3.g1.v4 c=0");
237 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512iw49oc2048ow49kw1pw0 post_ops=sum", "T=oc8mb4osp1 s=x3.g1.v2 c=0");
238 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64ih56oc64oh56kh3ph1", "T=oc2mb1osp8 s=x3.g1.v4 c=0");
239 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic128ih28oc128oh28kh3ph1", "T=oc4mb1osp8 s=x3.g1.v4 c=0");
240 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc256ow196kw1pw0", "T=oc8mb1osp4 s=x3.g1.v3 c=0");
241 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1", "T=oc8mb4osp1 s=x3.g1.v4 c=0");
242 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc64ow3136kw1pw0", "T=oc4mb1osp8 s=x1.g1.v0 c=0");
243 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512ih7oc512oh7kh3ph1", "T=oc4mb4osp1 s=x3.g1.v4 c=0");
244 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1", "T=oc4mb1osp8 s=x3.g1.v4 c=0");
245 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0", "T=oc8mb1osp4 s=x3.g2.v2 c=0");
246 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3", "T=oc2mb1ow8 s=x3.g1.v2 c=0");
247 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc128ow784kw1pw0", "T=oc4mb1osp2 s=x3.g1.v4 c=0");
248 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0 post_ops=sum", "T=oc4mb1osp2 s=x3.g2.v4 c=0");
249 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc512ow196kw1pw0", "T=oc8mb1osp4 s=x3.g2.v4 c=0");
250 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc128ow3136kw1pw0", "T=oc4mb1osp2 s=x3.g1.v4 c=0");
251 add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc256ow784kw1pw0", "T=oc8mb1osp4 s=x3.g2.v3 c=0");
252 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "simd=32 p=x0 T=ic4oc8mb1 l=oc16 i=mb16ic32oc16");
253 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic1024iw196oc256ow196kw1pw0", "simd=32 p=x0 T=ic2oc2iw2 l=oc8 i=mb16ic32oc16");
254 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic1024iw196oc512ow196kw1pw0", "simd=32 p=x0 T=oc4iw2 l=oc8 i=mb16ic32oc16");
255 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic128ih28oc128oh28kh3ph1", "simd=16 p=x0 T= i=mb16ic16oc16");
256 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic128ih56oc128oh28kh3sh2ph1", "simd=16 p=x0 T=ic2mb4 i=mb16ic16oc16");
257 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic128iw784oc512ow784kw1pw0", "simd=32 p=x0 T=oc4 l=oc8 i=mb8ic32oc16");
258 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic2048iw49oc512ow49kw1pw0", "simd=32 p=x0 T=ic2oc4 l=oc8 i=mb16ic32oc16");
259 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256ih14oc256oh14kh3ph1", "simd=32 p=x0 T=oc2iw2 l=oc8kw3kh3 i=mb8ic32oc16");
260 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256ih28oc256oh14kh3sh2ph1", "simd=32 p=x0 T=ic2oc2iw2 l=oc8kw3kh3 i=mb16ic32oc16");
261 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256ih56oc512oh28kh1sh2ph0", "simd=32 p=x0 T=oc2iw2 l=oc16 i=mb16ic32oc16");
262 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256iw196oc1024ow196kw1pw0", "simd=32 p=x0 T=oc2 l=oc32 i=mb8ic32oc16");
263 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256iw3136oc128ow3136kw1pw0", "simd=16 p=x0 T=ic2 i=mb16ic16oc16");
264 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256iw3136oc64ow3136kw1pw0", "simd=32 p=x0 T= i=mb16ic32oc16");
265 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512ih14oc512oh7kh3sh2ph1", "simd=32 p=x0 T=ic2oc4iw2 l=oc8kw3kh3 i=mb16ic32oc16");
266 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512ih28oc1024oh14kh1sh2ph0", "simd=32 p=x0 T=oc4iw8 l=oc16 i=mb16ic32oc16");
267 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512ih7oc512oh7kh3ph1", "simd=32 p=x0 T=oc2iw8 l=oc16kw3kh3 i=mb8ic32oc16");
268 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512iw49oc2048ow49kw1pw0", "simd=16 p=x1 T=ic2mb2 i=mb8ic16oc16");
269 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512iw784oc128ow784kw1pw0", "simd=16 p=x0 T=ic2 i=mb16ic16oc16");
270 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512iw784oc256ow784kw1pw0", "simd=32 p=x0 T=oc2 l=oc8 i=mb16ic32oc16");
271 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic64ih56oc64oh56kh3ph1", "simd=32 p=x0 T=iw2 i=mb8ic32oc16");
272 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic64iw3136oc256ow3136kw1pw0", "simd=32 p=x0 T=oc2 l=oc8 i=mb8ic32oc16");
273 add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic64iw3136oc64ow3136kw1pw0", "simd=16 p=x0 T=ic2 i=mb16ic16oc16");
274 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16");
275 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic1024iw196oc256ow196kw1pw0", "simd=16 p=x1 T=ic4oc2 i=ic16oc16mb16");
276 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic1024iw196oc512ow196kw1pw0", "simd=16 p=x1 T=ic4oc4 i=ic16oc16mb16");
277 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic128ih28oc128oh28kh3ph1", "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16");
278 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic128ih56oc128oh28kh3sh2ph1", "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16");
279 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic128iw784oc512ow784kw1pw0", "simd=16 p=x1 T=ic2oc8 i=ic16oc16mb16");
280 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic2048iw49oc512ow49kw1pw0", "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16");
281 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256ih14oc256oh14kh3ph1", "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16");
282 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256ih28oc256oh14kh3sh2ph1", "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16");
283 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256ih56oc512oh28kh1sh2ph0", "simd=16 p=x1 T=ic8oc4 i=ic16oc16mb16");
284 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256iw196oc1024ow196kw1pw0", "simd=16 p=x1 T=ic2oc2 i=ic16oc16mb16");
285 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256iw3136oc128ow3136kw1pw0", "simd=16 p=x0 T=ic4oc4 i=ic16oc16mb16");
286 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256iw3136oc64ow3136kw1pw0", "simd=16 p=x1 T=ic2oc4 i=ic16oc16mb16");
287 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic3ih224oc64oh112kh7sh2ph3", "simd=16 p=x0 l=ow28 T=oc2 i=kw8mb16oc16");
288 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512ih14oc512oh7kh3sh2ph1", "simd=16 p=x0 T=ic2oc2 i=ic16oc16mb16");
289 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512ih28oc1024oh14kh1sh2ph0", "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16");
290 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512ih7oc512oh7kh3ph1", "simd=16 p=x0 T=ic4 i=ic16oc16mb16");
291 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512iw49oc2048ow49kw1pw0", "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16");
292 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512iw784oc128ow784kw1pw0", "simd=16 p=x1 T=ic2oc4 i=ic16oc16mb16");
293 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512iw784oc256ow784kw1pw0", "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16");
294 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic64ih56oc64oh56kh3ph1", "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16");
295 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic64iw3136oc256ow3136kw1pw0", "simd=16 p=x1 T=ic2oc8 i=ic16oc16mb16");
296 add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic64iw3136oc64ow3136kw1pw0", "simd=16 p=x1 T=ic4oc2 i=ic16oc16mb16");
297 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "simd=32 p=x0 fsp=0 T=ic2oc4 l=ic32 i=mb16oc32ic16");
298 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic1024iw196oc256ow196kw1pw0", "simd=32 p=x0 fsp=0 T=ic2 l=ic32 i=mb8oc32ic16");
299 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic1024iw196oc512ow196kw1pw0", "simd=32 p=x0 fsp=0 T=ic4oc2ow2 l=ic16 i=mb16oc32ic16");
300 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic128ih28oc128oh28kh3ph1", "simd=16 p=x0 fsp=0 T=ow4 i=mb16oc16ic16");
301 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic128ih56oc128oh28kh3sh2ph1", "simd=16 p=x0 fsp=0 T= i=mb16oc16ic16");
302 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic128iw784oc512ow784kw1pw0", "simd=32 p=x0 fsp=0 T= i=mb8oc32ic16");
303 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic2048iw49oc512ow49kw1pw0", "simd=32 p=x0 fsp=0 T=ic2ow8 l=ic64 i=mb8oc32ic16");
304 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256ih14oc256oh14kh3ph1", "simd=32 p=x0 fsp=0 T=ic2mb2 l=ic8kw3kh3 i=mb8oc32ic16");
305 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256ih28oc256oh14kh3sh2ph1", "simd=32 p=x0 fsp=0 T=ic2ow2 l=ic8kw3kh3 i=mb8oc32ic16");
306 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256ih56oc512oh28kh1sh2ph0", "simd=32 p=x0 fsp=0 T=ic2 l=ic8 i=mb16oc32ic16");
307 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256iw196oc1024ow196kw1pw0", "simd=32 p=x0 fsp=0 T=ic2oc2ow2 l=ic8 i=mb16oc32ic16");
308 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256iw3136oc128ow3136kw1pw0", "simd=32 p=x0 fsp=0 T=ic2 l=ic8 i=mb8oc32ic16");
309 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256iw3136oc64ow3136kw1pw0", "simd=16 p=x1 fsp=0 T=ic2oc2 l=ic8 i=mb16oc16ic16");
310 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic3ih224oc64oh112kh7sh2ph3", "simd=32 p=x1 T= l=kh7 i=ow16oc32ic3kw7");
311 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512ih14oc512oh7kh3sh2ph1", "simd=32 p=x0 fsp=0 T=ic2ow8 l=ic16kw3kh3 i=mb8oc32ic16");
312 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512ih28oc1024oh14kh1sh2ph0", "simd=32 p=x0 fsp=0 T=ic4ow2 l=ic8 i=mb16oc32ic16");
313 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512ih7oc512oh7kh3ph1", "simd=32 p=x0 fsp=0 T=ic2ow8 l=ic16kw3kh3 i=mb8oc32ic16");
314 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512iw49oc2048ow49kw1pw0", "simd=32 p=x0 fsp=0 T=ic4oc2 l=ic8 i=mb16oc32ic16");
315 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512iw784oc128ow784kw1pw0", "simd=32 p=x0 fsp=0 T=ic4ow2 l=ic8 i=mb16oc32ic16");
316 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512iw784oc256ow784kw1pw0", "simd=32 p=x0 fsp=0 T=ic4 l=ic8 i=mb16oc32ic16");
317 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic64ih56oc64oh56kh3ph1", "simd=32 p=x0 fsp=0 T=ow2 i=mb8oc32ic16");
318 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic64iw3136oc256ow3136kw1pw0", "simd=32 p=x0 fsp=0 T= i=mb16oc32ic16");
319 add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic64iw3136oc64ow3136kw1pw0", "simd=16 p=x0 fsp=0 T=oc2 i=mb16oc16ic16");
320 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "p=x3 c=0 P=u T=ic4iw4 r=0");
321 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc256ow196kw1pw0", "p=x3 c=0 P=u T=ic4iw4 r=0");
322 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc512ow196kw1pw0", "p=x3 c=0 P=u T=ic4iw8 r=0");
323 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic128ih28oc128oh28kh3ph1", "p=x3 c=0 P=u T=ic2iw4 r=0");
324 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1", "p=x2 c=0 P=u T=ic2iw4 r=0");
325 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic128iw784oc512ow784kw1pw0", "p=x3 c=0 P=u T=ic2iw2 r=0");
326 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic2048iw49oc512ow49kw1pw0", "p=x3 c=0 P=u T=ic4iw8 r=0");
327 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256ih14oc256oh14kh3ph1", "p=x3 c=0 P=u T=ic4iw8 r=0");
328 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1", "p=x3 c=0 P=u T=ic4iw8 r=0");
329 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0", "p=x3 c=0 P=u T=ic4mb2 r=0");
330 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256iw196oc1024ow196kw1pw0", "p=x3 c=0 P=u T=ic4iw4 r=0");
331 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc128ow3136kw1pw0", "p=x2 c=0 P= T=ic4mb4 r=0");
332 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc64ow3136kw1pw0", "p=x0 c=0 P= T=ic2mb4 r=0");
333 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1", "p=x3 c=0 P=u T=ic4iw4 r=0");
334 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0", "p=x3 c=0 P=u T=ic4iw4 r=0");
335 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512ih7oc512oh7kh3ph1", "p=x3 c=0 P=u T=ic4iw8 r=0");
336 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512iw49oc2048ow49kw1pw0", "p=x3 c=0 P=u T=ic4iw8 r=0");
337 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc128ow784kw1pw0", "p=x3 c=0 P=u T=ic4iw2 r=0");
338 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc256ow784kw1pw0", "p=x3 c=0 P=u T=ic8iw2 r=0");
339 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic64ih56oc64oh56kh3ph1", "p=x2 c=0 P=u T=iw8 r=0");
340 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc256ow3136kw1pw0", "p=x3 c=0 P=u T=ic2mb4 r=0");
341 add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc64ow3136kw1pw0", "p=x0 c=0 P=u T=iw2 r=0");
342 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "p=x3 c=0 P=u T=ic8oc4 r=0");
343 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic1024iw196oc256ow196kw1pw0", "p=x3 c=0 P=u T=ic8oc4 r=0");
344 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic1024iw196oc512ow196kw1pw0", "p=x3 c=0 P=u T=ic8oc4 r=0");
345 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic128ih28oc128oh28kh3ph1", "p=x3 c=0 P=u T=ic4oc2 r=0");
346 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1", "p=x3 c=0 P=u T=ic4oc2 r=0");
347 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic128iw784oc512ow784kw1pw0", "p=x3 c=0 P=u T=ic4oc4 r=0");
348 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic2048iw49oc512ow49kw1pw0", "p=x3 c=0 P=u T=ic8oc4 r=0");
349 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256ih14oc256oh14kh3ph1", "p=x3 c=0 P=u T=ic8oc4 r=0");
350 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1", "p=x3 c=0 P=u T=ic8oc4 r=0");
351 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0", "p=x3 c=0 P=u T=ic8oc4 r=0");
352 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256iw196oc1024ow196kw1pw0", "p=x3 c=0 P=u T=ic8oc4 r=0");
353 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256iw3136oc128ow3136kw1pw0", "p=x3 c=0 P=u T=ic8oc4 r=0");
354 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256iw3136oc64ow3136kw1pw0", "p=x1 c=0 P=u T=ic8 r=0");
355 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3", "p=x1 c=0 P=u T= r=0");
356 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1", "p=x3 c=0 P=u T=ic8oc4 r=0");
357 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0", "p=x3 c=0 P=u T=ic8oc4 r=0");
358 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512ih7oc512oh7kh3ph1", "p=x3 c=0 P=u T=ic8oc4 r=0");
359 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512iw49oc2048ow49kw1pw0", "p=x3 c=0 P=u T=ic8oc4 r=0");
360 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512iw784oc128ow784kw1pw0", "p=x3 c=0 P=u T=ic8oc2 r=0");
361 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512iw784oc256ow784kw1pw0", "p=x3 c=0 P=u T=ic8oc4 r=0");
362 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic64ih56oc64oh56kh3ph1", "p=x1 c=0 P=u T=ic2 r=0");
363 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic64iw3136oc256ow3136kw1pw0", "p=x3 c=0 P= T=ic2oc2 r=0");
364 add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic64iw3136oc64ow3136kw1pw0", "p=x1 c=0 P= T=ic2 r=0");
365 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0");
366 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc256ow196kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc4ow4 r=0");
367 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc512ow196kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc2ow8 r=0");
368 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic128ih28oc128oh28kh3ph1", "p=x3 fsp=0 c=0 P=u T=oc2ow4 r=0");
369 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1", "p=x3 fsp=0 c=0 P=u T=oc2ow4 r=0");
370 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic128iw784oc512ow784kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc4ow2 r=0");
371 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic2048iw49oc512ow49kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0");
372 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256ih14oc256oh14kh3ph1", "p=x3 fsp=0 c=0 P=u T=oc4ow4 r=0");
373 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1", "p=x3 fsp=0 c=0 P=u T=oc4ow4 r=0");
374 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0", "p=x3 fsp=0 c=0 P=u T=oc8ow2 r=0");
375 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256iw196oc1024ow196kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0");
376 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc128ow3136kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc4mb2 r=0");
377 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc64ow3136kw1pw0", "p=x0 fsp=0 c=0 P=u T= r=0");
378 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3", "p=x2 c=0 P=u T=ow4 r=0");
379 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1", "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0");
380 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0", "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0");
381 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512ih7oc512oh7kh3ph1", "p=x3 fsp=0 c=0 P=u T=oc2ow8 r=0");
382 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512iw49oc2048ow49kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0");
383 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc128ow784kw1pw0", "p=x2 fsp=0 c=0 P=u T=oc2 r=0");
384 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc256ow784kw1pw0", "p=x3 fsp=0 c=0 P=u T=oc4ow2 r=0");
385 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic64ih56oc64oh56kh3ph1", "p=x3 fsp=0 c=0 P=u T=ow8 r=0");
386 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc256ow3136kw1pw0", "p=x2 fsp=0 c=0 P=u T=oc2mb4 r=0");
387 add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc64ow3136kw1pw0", "p=x0 fsp=0 c=0 P=u T=ow2 r=0");
388 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0", "p=x3 fsp=0 c=0 T=oc2mb8 r=0 stg=1");
389 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc256ow196kw1pw0", "p=x3 fsp=1 c=0 T=oc4 r=0");
390 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc512ow196kw1pw0", "p=x3 fsp=1 c=0 T=oc4osp4 r=0");
391 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic128ih28oc128oh28kh3ph1", "p=x3 fsp=1 c=0 T=oc2osp4 r=0");
392 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1", "p=x3 fsp=1 c=0 T=oc2osp4 r=0");
393 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic128iw784oc512ow784kw1pw0 post_ops=sum", "p=x0 fsp=0 c=0 T=oc2 r=0");
394 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic2048iw49oc512ow49kw1pw0", "p=x3 fsp=1 c=0 T=oc4mb8 r=0 stg=1");
395 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256ih14oc256oh14kh3ph1", "p=x3 fsp=1 c=0 T=oc2osp4 r=0");
396 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1", "p=x3 fsp=0 c=0 T=oc4ow2 r=0");
397 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0", "p=x3 fsp=0 c=0 T=oc4mb2 r=0");
398 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256iw196oc1024ow196kw1pw0 post_ops=sum", "p=x3 fsp=0 c=0 T=oc4 r=0");
399 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc128ow3136kw1pw0", "p=x0 fsp=0 c=0 T=mb2 r=0 stg=1");
400 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc64ow3136kw1pw0", "p=x0 fsp=0 c=0 T= r=0");
401 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3", "p=x0 c=0 T= r=0");
402 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1", "p=x3 fsp=0 c=0 T=oc4ow8 r=0");
403 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0", "p=x3 fsp=1 c=0 T=oc4osp4 r=0");
404 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512ih7oc512oh7kh3ph1", "p=x3 fsp=1 c=0 T=oc2mb8 r=0 stg=1");
405 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512iw49oc2048ow49kw1pw0 post_ops=sum", "p=x3 fsp=0 c=0 T=oc4mb8 r=0 stg=1");
406 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc128ow784kw1pw0", "p=x3 fsp=1 c=0 T=mb2oc2 r=0 stg=1");
407 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc256ow784kw1pw0", "p=x2 fsp=0 c=0 T=oc4ow2 r=0");
408 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64ih56oc64oh56kh3ph1", "p=x3 fsp=1 c=0 T=osp4 r=0");
409 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0 post_ops=sum", "p=x0 fsp=0 c=0 T=mb2 r=0");
410 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0", "p=x1 fsp=1 c=0 T=oc2osp8 r=0");
411 add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc64ow3136kw1pw0", "p=x0 fsp=0 c=0 T=ow2 r=0");
412 // clang-format on
413}
414
415const char *conv_config_lookup_table_t::find(const conv_config_t &cfg) const {
416 auto key = cfg.prb().desc_str(/*print_mb=*/false);
417 auto it = map_.find(key);
418 if (it == map_.end()) return nullptr;
419 for (auto &e : it->second) {
420 if (e.filter.matches(cfg.prb(), cfg.hw_cfg())) return e.s_params;
421 }
422 return nullptr;
423}
424
425void conv_config_lookup_table_t::add(const char *s_prb, const char *s_params) {
426 conv_problem_filter_t filter(s_prb);
427 map_[filter.key()].push_back(entry_t {filter, s_params});
428}
429
430} // namespace jit
431} // namespace gpu
432} // namespace impl
433} // namespace dnnl
434