1 | /******************************************************************************* |
2 | * Copyright 2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include "gpu/jit/conv/config_lookup_table.hpp" |
18 | |
19 | #include <string> |
20 | #include <vector> |
21 | #include <unordered_map> |
22 | |
23 | #include "gpu/jit/conv/config.hpp" |
24 | #include "gpu/jit/utils/utils.hpp" |
25 | |
26 | namespace dnnl { |
27 | namespace impl { |
28 | namespace gpu { |
29 | namespace jit { |
30 | |
31 | ngen::HW to_hw(const std::string &s) { |
32 | using namespace ir_utils; |
33 | #define CASE(name) \ |
34 | if (s == #name || s == to_lower(#name)) return ngen::HW::name; |
35 | CASE(XeHP) |
36 | CASE(XeHPG) |
37 | CASE(XeHPC) |
38 | #undef CASE |
39 | ir_error_not_expected(); |
40 | return ngen::HW::Unknown; |
41 | } |
42 | |
43 | int_filter_t::int_filter_t(const std::string &s) { |
44 | cmp_op_ = op_kind_t::_eq; |
45 | if (s.empty()) { |
46 | value_ = 0; |
47 | return; |
48 | } |
49 | auto end = s.size(); |
50 | auto last = s[end - 1]; |
51 | if (last == '+') { |
52 | cmp_op_ = op_kind_t::_ge; |
53 | end--; |
54 | } |
55 | value_ = std::stoi(s.substr(0, end)); |
56 | } |
57 | |
58 | bool int_filter_t::matches(int value) const { |
59 | switch (cmp_op_) { |
60 | case op_kind_t::_eq: return value == value_; |
61 | case op_kind_t::_le: return value <= value_; |
62 | case op_kind_t::_ge: return value >= value_; |
63 | case op_kind_t::_lt: return value < value_; |
64 | case op_kind_t::_gt: return value > value_; |
65 | default: ir_error_not_expected(); |
66 | } |
67 | return false; |
68 | } |
69 | |
70 | type_filter_t::type_filter_t(const std::string &s) { |
71 | for (size_t pos = 0;;) { |
72 | bool found = false; |
73 | for (auto &p : all_patterns()) { |
74 | if (try_parse(s, pos, p)) { |
75 | found = true; |
76 | break; |
77 | } |
78 | } |
79 | if (!found) { |
80 | ir_assert(pos == s.size()) << s; |
81 | break; |
82 | } |
83 | } |
84 | } |
85 | |
86 | bool type_filter_t::matches(const std::vector<data_type_t> &values) const { |
87 | ir_assert(values.size() == patterns_.size()); |
88 | for (size_t i = 0; i < values.size(); i++) { |
89 | auto &ptrn = patterns_[i]; |
90 | if (ptrn == "*" ) continue; |
91 | if (ptrn == "x8" ) { |
92 | if (!utils::one_of(values[i], data_type::s8, data_type::u8)) |
93 | return false; |
94 | } else if (ptrn == "f32" ) { |
95 | if (values[i] != data_type::f32) return false; |
96 | } else if (ptrn == "bf16" ) { |
97 | if (values[i] != data_type::bf16) return false; |
98 | } else if (ptrn == "f16" ) { |
99 | if (values[i] != data_type::f16) return false; |
100 | } else { |
101 | ir_error_not_expected() << ptrn; |
102 | } |
103 | } |
104 | return true; |
105 | } |
106 | |
107 | bool type_filter_t::try_parse( |
108 | const std::string &s, size_t &pos, const std::string &pattern) { |
109 | if (pos + pattern.size() > s.size()) return false; |
110 | if (!std::equal(pattern.begin(), pattern.end(), s.begin() + pos)) |
111 | return false; |
112 | patterns_.push_back(s.substr(pos, pattern.size())); |
113 | pos = pos + pattern.size(); |
114 | return true; |
115 | } |
116 | |
117 | std::vector<std::string> &type_filter_t::all_patterns() { |
118 | static std::vector<std::string> ret = { |
119 | "x8" , |
120 | "bf16" , |
121 | "f16" , |
122 | "f32" , |
123 | "*" , |
124 | }; |
125 | return ret; |
126 | } |
127 | conv_problem_filter_t::conv_problem_filter_t(const std::string &s) { |
128 | auto parts = ir_utils::split(s, " " ); |
129 | for (auto &part : parts) { |
130 | auto sub_parts = ir_utils::split(part, "=" ); |
131 | ir_assert(sub_parts.size() == 2) << part; |
132 | auto &name = sub_parts[0]; |
133 | auto &value = sub_parts[1]; |
134 | if (name == "hw" ) { |
135 | hw_ = to_hw(value); |
136 | } else if (name == "cfg" ) { |
137 | type_filter_ = type_filter_t(value); |
138 | } else if (name == "dir" ) { |
139 | dir_ = value; |
140 | } else if (name == "desc" ) { |
141 | desc_ = value; |
142 | } else if (name == "mb" ) { |
143 | mb_filter_ = int_filter_t(value); |
144 | } else if (name == "post_ops" ) { |
145 | post_ops_ = value; |
146 | } else { |
147 | ir_error_not_expected() << part; |
148 | } |
149 | } |
150 | } |
151 | |
152 | bool conv_problem_filter_t::matches( |
153 | const conv_problem_t &prb, const hw_config_t &hw_cfg) const { |
154 | if (hw_cfg.hw() != hw_) return false; |
155 | if (!matches_dir(prb)) return false; |
156 | if (!type_filter_.matches( |
157 | {prb.src_data_type, prb.wei_data_type, prb.dst_data_type})) |
158 | return false; |
159 | if (!fpmath_filter_.matches(prb.fpmath_mode)) return false; |
160 | if (!mb_filter_.matches(prb.mb)) return false; |
161 | if (!matches_desc(prb)) return false; |
162 | if (!matches_post_ops(prb)) return false; |
163 | return true; |
164 | } |
165 | |
166 | bool conv_problem_filter_t::matches_dir(const conv_problem_t &prb) const { |
167 | if (dir_.empty()) return true; |
168 | if (dir_ == "fwd" ) { |
169 | return prb.is_fwd; |
170 | } else if (dir_ == "bwd_d" ) { |
171 | return prb.is_bwd_d; |
172 | } else if (dir_ == "bwd_w" ) { |
173 | return prb.is_bwd_w; |
174 | } else { |
175 | ir_error_not_expected() << dir_; |
176 | } |
177 | return false; |
178 | } |
179 | |
180 | bool conv_problem_filter_t::matches_desc(const conv_problem_t &prb) const { |
181 | return prb.desc_str(/*print_mb=*/false) == desc_; |
182 | } |
183 | |
184 | bool conv_problem_filter_t::matches_post_ops(const conv_problem_t &prb) const { |
185 | if (post_ops_ == "*" ) return true; |
186 | if (post_ops_ == "sum" ) return prb.with_sum; |
187 | ir_assert(post_ops_.empty()) << post_ops_; |
188 | return !prb.with_sum; |
189 | } |
190 | |
191 | conv_config_lookup_table_t::conv_config_lookup_table_t() { |
192 | // clang-format off |
193 | // wdsr |
194 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih240iw135oc32oh240ow135kh3kw3ph1pw1" , "fsp=1" ); |
195 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic32ih240iw135oc128oh240ow135kh3kw3ph1pw1" , "fsp=1" ); |
196 | // kuaishou noisy |
197 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih56oc128oh56kh3ph1 post_ops=*" , "fsp=1" ); |
198 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih56oc256oh28kh1sh2ph0" , "fsp=1" ); |
199 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic128ih56oc256oh28kh3sh2ph1" , "fsp=1" ); |
200 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic256ih28oc512oh14kh3sh2ph1" , "fsp=1" ); |
201 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic3ih448oc64oh224kh7sh2ph3" , "fsp=1" ); |
202 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic512ih14oc512oh14kh3ph1 post_ops=*" , "fsp=1" ); |
203 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic64ih112oc128oh56kh1sh2ph0" , "fsp=1" ); |
204 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic64ih112oc128oh56kh3sh2ph1" , "fsp=1" ); |
205 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic64ih112oc64oh112kh3ph1" , "fsp=1" ); |
206 | // kuaishou block |
207 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic512ih28iw21oc512oh28ow21kh3kw3ph1pw1 post_ops=*" , "fsp=1" ); |
208 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic256ih56iw42oc256oh56ow42kh3kw3ph1pw1 post_ops=*" , "fsp=1" ); |
209 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic3ih896iw672oc64oh448ow336kh7kw7sh2sw2ph3pw3" , "fsp=1" ); |
210 | // kuaishou blur |
211 | // |
212 | add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic768iw900oc160ow900kw1pw0" , "fsp=1" ); |
213 | add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic768iw900oc192ow900kw1pw0" , "fsp=1" ); |
214 | add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic768ih30oc192oh30kh1ph0" , "fsp=1" ); |
215 | add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic288ih61oc384oh30kh3sh2ph0" , "fsp=1" ); |
216 | add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic192iw3721oc64ow3721kw1pw0" , "fsp=1" ); |
217 | add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic192iw3721oc48ow3721kw1pw0" , "fsp=1" ); |
218 | add("hw=xehpg dir=fwd cfg=x8x8x8 mb=1+ desc=ic32ih255oc32oh253kh3ph0" , "fsp=1" ); |
219 | // |
220 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic768iw900oc160ow900kw1pw0" , "fsp=1" ); |
221 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic768iw900oc192ow900kw1pw0" , "fsp=1" ); |
222 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic768ih30oc192oh30kh1ph0" , "fsp=1" ); |
223 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic288ih61oc384oh30kh3sh2ph0" , "fsp=1" ); |
224 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic192iw3721oc64ow3721kw1pw0" , "fsp=1" ); |
225 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic192iw3721oc48ow3721kw1pw0" , "fsp=1" ); |
226 | add("hw=xehpg dir=fwd cfg=f16f16f16 mb=1+ desc=ic32ih255oc32oh253kh3ph0" , "fsp=1" ); |
227 | |
228 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic2048iw49oc512ow49kw1pw0" , "T=oc8mb1osp4 s=x3.g2.v4 c=0" ); |
229 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic128iw784oc512ow784kw1pw0 post_ops=sum" , "T=oc8mb1osp4 s=x2.g1.v1 c=0" ); |
230 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1" , "T=oc8mb1osp4 s=x3.g2.v4 c=0" ); |
231 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0" , "T=oc2mb1osp4 s=x3.g2.v2 c=0" ); |
232 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "T=oc8mb1osp4 s=x3.g2.v4 c=0" ); |
233 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "T=oc8mb4osp1 s=x3.g2.v4 c=0" ); |
234 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256iw196oc1024ow196kw1pw0 post_ops=sum" , "T=oc8mb1osp4 s=x3.g2.v2 c=0" ); |
235 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc64ow3136kw1pw0" , "T=oc4mb1osp8 s=x1.g1.v0 c=0" ); |
236 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256ih14oc256oh14kh3ph1" , "T=oc8mb4osp1 s=x3.g1.v4 c=0" ); |
237 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512iw49oc2048ow49kw1pw0 post_ops=sum" , "T=oc8mb4osp1 s=x3.g1.v2 c=0" ); |
238 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64ih56oc64oh56kh3ph1" , "T=oc2mb1osp8 s=x3.g1.v4 c=0" ); |
239 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic128ih28oc128oh28kh3ph1" , "T=oc4mb1osp8 s=x3.g1.v4 c=0" ); |
240 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc256ow196kw1pw0" , "T=oc8mb1osp4 s=x3.g1.v3 c=0" ); |
241 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1" , "T=oc8mb4osp1 s=x3.g1.v4 c=0" ); |
242 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc64ow3136kw1pw0" , "T=oc4mb1osp8 s=x1.g1.v0 c=0" ); |
243 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512ih7oc512oh7kh3ph1" , "T=oc4mb4osp1 s=x3.g1.v4 c=0" ); |
244 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1" , "T=oc4mb1osp8 s=x3.g1.v4 c=0" ); |
245 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0" , "T=oc8mb1osp4 s=x3.g2.v2 c=0" ); |
246 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3" , "T=oc2mb1ow8 s=x3.g1.v2 c=0" ); |
247 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc128ow784kw1pw0" , "T=oc4mb1osp2 s=x3.g1.v4 c=0" ); |
248 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0 post_ops=sum" , "T=oc4mb1osp2 s=x3.g2.v4 c=0" ); |
249 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc512ow196kw1pw0" , "T=oc8mb1osp4 s=x3.g2.v4 c=0" ); |
250 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc128ow3136kw1pw0" , "T=oc4mb1osp2 s=x3.g1.v4 c=0" ); |
251 | add("hw=xehpg dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc256ow784kw1pw0" , "T=oc8mb1osp4 s=x3.g2.v3 c=0" ); |
252 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "simd=32 p=x0 T=ic4oc8mb1 l=oc16 i=mb16ic32oc16" ); |
253 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic1024iw196oc256ow196kw1pw0" , "simd=32 p=x0 T=ic2oc2iw2 l=oc8 i=mb16ic32oc16" ); |
254 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic1024iw196oc512ow196kw1pw0" , "simd=32 p=x0 T=oc4iw2 l=oc8 i=mb16ic32oc16" ); |
255 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic128ih28oc128oh28kh3ph1" , "simd=16 p=x0 T= i=mb16ic16oc16" ); |
256 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic128ih56oc128oh28kh3sh2ph1" , "simd=16 p=x0 T=ic2mb4 i=mb16ic16oc16" ); |
257 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic128iw784oc512ow784kw1pw0" , "simd=32 p=x0 T=oc4 l=oc8 i=mb8ic32oc16" ); |
258 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic2048iw49oc512ow49kw1pw0" , "simd=32 p=x0 T=ic2oc4 l=oc8 i=mb16ic32oc16" ); |
259 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256ih14oc256oh14kh3ph1" , "simd=32 p=x0 T=oc2iw2 l=oc8kw3kh3 i=mb8ic32oc16" ); |
260 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256ih28oc256oh14kh3sh2ph1" , "simd=32 p=x0 T=ic2oc2iw2 l=oc8kw3kh3 i=mb16ic32oc16" ); |
261 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256ih56oc512oh28kh1sh2ph0" , "simd=32 p=x0 T=oc2iw2 l=oc16 i=mb16ic32oc16" ); |
262 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256iw196oc1024ow196kw1pw0" , "simd=32 p=x0 T=oc2 l=oc32 i=mb8ic32oc16" ); |
263 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256iw3136oc128ow3136kw1pw0" , "simd=16 p=x0 T=ic2 i=mb16ic16oc16" ); |
264 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic256iw3136oc64ow3136kw1pw0" , "simd=32 p=x0 T= i=mb16ic32oc16" ); |
265 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512ih14oc512oh7kh3sh2ph1" , "simd=32 p=x0 T=ic2oc4iw2 l=oc8kw3kh3 i=mb16ic32oc16" ); |
266 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "simd=32 p=x0 T=oc4iw8 l=oc16 i=mb16ic32oc16" ); |
267 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512ih7oc512oh7kh3ph1" , "simd=32 p=x0 T=oc2iw8 l=oc16kw3kh3 i=mb8ic32oc16" ); |
268 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512iw49oc2048ow49kw1pw0" , "simd=16 p=x1 T=ic2mb2 i=mb8ic16oc16" ); |
269 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512iw784oc128ow784kw1pw0" , "simd=16 p=x0 T=ic2 i=mb16ic16oc16" ); |
270 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic512iw784oc256ow784kw1pw0" , "simd=32 p=x0 T=oc2 l=oc8 i=mb16ic32oc16" ); |
271 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic64ih56oc64oh56kh3ph1" , "simd=32 p=x0 T=iw2 i=mb8ic32oc16" ); |
272 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic64iw3136oc256ow3136kw1pw0" , "simd=32 p=x0 T=oc2 l=oc8 i=mb8ic32oc16" ); |
273 | add("hw=xehpc dir=bwd_d cfg=f32f32f32 mb=16+ desc=ic64iw3136oc64ow3136kw1pw0" , "simd=16 p=x0 T=ic2 i=mb16ic16oc16" ); |
274 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16" ); |
275 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic1024iw196oc256ow196kw1pw0" , "simd=16 p=x1 T=ic4oc2 i=ic16oc16mb16" ); |
276 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic1024iw196oc512ow196kw1pw0" , "simd=16 p=x1 T=ic4oc4 i=ic16oc16mb16" ); |
277 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic128ih28oc128oh28kh3ph1" , "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16" ); |
278 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic128ih56oc128oh28kh3sh2ph1" , "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16" ); |
279 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic128iw784oc512ow784kw1pw0" , "simd=16 p=x1 T=ic2oc8 i=ic16oc16mb16" ); |
280 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic2048iw49oc512ow49kw1pw0" , "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16" ); |
281 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256ih14oc256oh14kh3ph1" , "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16" ); |
282 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256ih28oc256oh14kh3sh2ph1" , "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16" ); |
283 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256ih56oc512oh28kh1sh2ph0" , "simd=16 p=x1 T=ic8oc4 i=ic16oc16mb16" ); |
284 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256iw196oc1024ow196kw1pw0" , "simd=16 p=x1 T=ic2oc2 i=ic16oc16mb16" ); |
285 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256iw3136oc128ow3136kw1pw0" , "simd=16 p=x0 T=ic4oc4 i=ic16oc16mb16" ); |
286 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic256iw3136oc64ow3136kw1pw0" , "simd=16 p=x1 T=ic2oc4 i=ic16oc16mb16" ); |
287 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic3ih224oc64oh112kh7sh2ph3" , "simd=16 p=x0 l=ow28 T=oc2 i=kw8mb16oc16" ); |
288 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512ih14oc512oh7kh3sh2ph1" , "simd=16 p=x0 T=ic2oc2 i=ic16oc16mb16" ); |
289 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16" ); |
290 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512ih7oc512oh7kh3ph1" , "simd=16 p=x0 T=ic4 i=ic16oc16mb16" ); |
291 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512iw49oc2048ow49kw1pw0" , "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16" ); |
292 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512iw784oc128ow784kw1pw0" , "simd=16 p=x1 T=ic2oc4 i=ic16oc16mb16" ); |
293 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic512iw784oc256ow784kw1pw0" , "simd=16 p=x0 T=ic2oc4 i=ic16oc16mb16" ); |
294 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic64ih56oc64oh56kh3ph1" , "simd=16 p=x0 T=ic4oc2 i=ic16oc16mb16" ); |
295 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic64iw3136oc256ow3136kw1pw0" , "simd=16 p=x1 T=ic2oc8 i=ic16oc16mb16" ); |
296 | add("hw=xehpc dir=bwd_w cfg=f32f32f32 mb=16+ desc=ic64iw3136oc64ow3136kw1pw0" , "simd=16 p=x1 T=ic4oc2 i=ic16oc16mb16" ); |
297 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "simd=32 p=x0 fsp=0 T=ic2oc4 l=ic32 i=mb16oc32ic16" ); |
298 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic1024iw196oc256ow196kw1pw0" , "simd=32 p=x0 fsp=0 T=ic2 l=ic32 i=mb8oc32ic16" ); |
299 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic1024iw196oc512ow196kw1pw0" , "simd=32 p=x0 fsp=0 T=ic4oc2ow2 l=ic16 i=mb16oc32ic16" ); |
300 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic128ih28oc128oh28kh3ph1" , "simd=16 p=x0 fsp=0 T=ow4 i=mb16oc16ic16" ); |
301 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic128ih56oc128oh28kh3sh2ph1" , "simd=16 p=x0 fsp=0 T= i=mb16oc16ic16" ); |
302 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic128iw784oc512ow784kw1pw0" , "simd=32 p=x0 fsp=0 T= i=mb8oc32ic16" ); |
303 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic2048iw49oc512ow49kw1pw0" , "simd=32 p=x0 fsp=0 T=ic2ow8 l=ic64 i=mb8oc32ic16" ); |
304 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256ih14oc256oh14kh3ph1" , "simd=32 p=x0 fsp=0 T=ic2mb2 l=ic8kw3kh3 i=mb8oc32ic16" ); |
305 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256ih28oc256oh14kh3sh2ph1" , "simd=32 p=x0 fsp=0 T=ic2ow2 l=ic8kw3kh3 i=mb8oc32ic16" ); |
306 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256ih56oc512oh28kh1sh2ph0" , "simd=32 p=x0 fsp=0 T=ic2 l=ic8 i=mb16oc32ic16" ); |
307 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256iw196oc1024ow196kw1pw0" , "simd=32 p=x0 fsp=0 T=ic2oc2ow2 l=ic8 i=mb16oc32ic16" ); |
308 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256iw3136oc128ow3136kw1pw0" , "simd=32 p=x0 fsp=0 T=ic2 l=ic8 i=mb8oc32ic16" ); |
309 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic256iw3136oc64ow3136kw1pw0" , "simd=16 p=x1 fsp=0 T=ic2oc2 l=ic8 i=mb16oc16ic16" ); |
310 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic3ih224oc64oh112kh7sh2ph3" , "simd=32 p=x1 T= l=kh7 i=ow16oc32ic3kw7" ); |
311 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512ih14oc512oh7kh3sh2ph1" , "simd=32 p=x0 fsp=0 T=ic2ow8 l=ic16kw3kh3 i=mb8oc32ic16" ); |
312 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "simd=32 p=x0 fsp=0 T=ic4ow2 l=ic8 i=mb16oc32ic16" ); |
313 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512ih7oc512oh7kh3ph1" , "simd=32 p=x0 fsp=0 T=ic2ow8 l=ic16kw3kh3 i=mb8oc32ic16" ); |
314 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512iw49oc2048ow49kw1pw0" , "simd=32 p=x0 fsp=0 T=ic4oc2 l=ic8 i=mb16oc32ic16" ); |
315 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512iw784oc128ow784kw1pw0" , "simd=32 p=x0 fsp=0 T=ic4ow2 l=ic8 i=mb16oc32ic16" ); |
316 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic512iw784oc256ow784kw1pw0" , "simd=32 p=x0 fsp=0 T=ic4 l=ic8 i=mb16oc32ic16" ); |
317 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic64ih56oc64oh56kh3ph1" , "simd=32 p=x0 fsp=0 T=ow2 i=mb8oc32ic16" ); |
318 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic64iw3136oc256ow3136kw1pw0" , "simd=32 p=x0 fsp=0 T= i=mb16oc32ic16" ); |
319 | add("hw=xehpc dir=fwd cfg=f32f32f32 mb=16+ desc=ic64iw3136oc64ow3136kw1pw0" , "simd=16 p=x0 fsp=0 T=oc2 i=mb16oc16ic16" ); |
320 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "p=x3 c=0 P=u T=ic4iw4 r=0" ); |
321 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc256ow196kw1pw0" , "p=x3 c=0 P=u T=ic4iw4 r=0" ); |
322 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc512ow196kw1pw0" , "p=x3 c=0 P=u T=ic4iw8 r=0" ); |
323 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic128ih28oc128oh28kh3ph1" , "p=x3 c=0 P=u T=ic2iw4 r=0" ); |
324 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1" , "p=x2 c=0 P=u T=ic2iw4 r=0" ); |
325 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic128iw784oc512ow784kw1pw0" , "p=x3 c=0 P=u T=ic2iw2 r=0" ); |
326 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic2048iw49oc512ow49kw1pw0" , "p=x3 c=0 P=u T=ic4iw8 r=0" ); |
327 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256ih14oc256oh14kh3ph1" , "p=x3 c=0 P=u T=ic4iw8 r=0" ); |
328 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1" , "p=x3 c=0 P=u T=ic4iw8 r=0" ); |
329 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0" , "p=x3 c=0 P=u T=ic4mb2 r=0" ); |
330 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256iw196oc1024ow196kw1pw0" , "p=x3 c=0 P=u T=ic4iw4 r=0" ); |
331 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc128ow3136kw1pw0" , "p=x2 c=0 P= T=ic4mb4 r=0" ); |
332 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc64ow3136kw1pw0" , "p=x0 c=0 P= T=ic2mb4 r=0" ); |
333 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1" , "p=x3 c=0 P=u T=ic4iw4 r=0" ); |
334 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "p=x3 c=0 P=u T=ic4iw4 r=0" ); |
335 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512ih7oc512oh7kh3ph1" , "p=x3 c=0 P=u T=ic4iw8 r=0" ); |
336 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512iw49oc2048ow49kw1pw0" , "p=x3 c=0 P=u T=ic4iw8 r=0" ); |
337 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc128ow784kw1pw0" , "p=x3 c=0 P=u T=ic4iw2 r=0" ); |
338 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc256ow784kw1pw0" , "p=x3 c=0 P=u T=ic8iw2 r=0" ); |
339 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic64ih56oc64oh56kh3ph1" , "p=x2 c=0 P=u T=iw8 r=0" ); |
340 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc256ow3136kw1pw0" , "p=x3 c=0 P=u T=ic2mb4 r=0" ); |
341 | add("hw=xehpc dir=bwd_d cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc64ow3136kw1pw0" , "p=x0 c=0 P=u T=iw2 r=0" ); |
342 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
343 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic1024iw196oc256ow196kw1pw0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
344 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic1024iw196oc512ow196kw1pw0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
345 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic128ih28oc128oh28kh3ph1" , "p=x3 c=0 P=u T=ic4oc2 r=0" ); |
346 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1" , "p=x3 c=0 P=u T=ic4oc2 r=0" ); |
347 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic128iw784oc512ow784kw1pw0" , "p=x3 c=0 P=u T=ic4oc4 r=0" ); |
348 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic2048iw49oc512ow49kw1pw0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
349 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256ih14oc256oh14kh3ph1" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
350 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
351 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
352 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256iw196oc1024ow196kw1pw0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
353 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256iw3136oc128ow3136kw1pw0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
354 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic256iw3136oc64ow3136kw1pw0" , "p=x1 c=0 P=u T=ic8 r=0" ); |
355 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3" , "p=x1 c=0 P=u T= r=0" ); |
356 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
357 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
358 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512ih7oc512oh7kh3ph1" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
359 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512iw49oc2048ow49kw1pw0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
360 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512iw784oc128ow784kw1pw0" , "p=x3 c=0 P=u T=ic8oc2 r=0" ); |
361 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic512iw784oc256ow784kw1pw0" , "p=x3 c=0 P=u T=ic8oc4 r=0" ); |
362 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic64ih56oc64oh56kh3ph1" , "p=x1 c=0 P=u T=ic2 r=0" ); |
363 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic64iw3136oc256ow3136kw1pw0" , "p=x3 c=0 P= T=ic2oc2 r=0" ); |
364 | add("hw=xehpc dir=bwd_w cfg=bf16*bf16 mb=128+ desc=ic64iw3136oc64ow3136kw1pw0" , "p=x1 c=0 P= T=ic2 r=0" ); |
365 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0" ); |
366 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc256ow196kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc4ow4 r=0" ); |
367 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic1024iw196oc512ow196kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc2ow8 r=0" ); |
368 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic128ih28oc128oh28kh3ph1" , "p=x3 fsp=0 c=0 P=u T=oc2ow4 r=0" ); |
369 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1" , "p=x3 fsp=0 c=0 P=u T=oc2ow4 r=0" ); |
370 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic128iw784oc512ow784kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc4ow2 r=0" ); |
371 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic2048iw49oc512ow49kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0" ); |
372 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256ih14oc256oh14kh3ph1" , "p=x3 fsp=0 c=0 P=u T=oc4ow4 r=0" ); |
373 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1" , "p=x3 fsp=0 c=0 P=u T=oc4ow4 r=0" ); |
374 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0" , "p=x3 fsp=0 c=0 P=u T=oc8ow2 r=0" ); |
375 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256iw196oc1024ow196kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0" ); |
376 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc128ow3136kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc4mb2 r=0" ); |
377 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic256iw3136oc64ow3136kw1pw0" , "p=x0 fsp=0 c=0 P=u T= r=0" ); |
378 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3" , "p=x2 c=0 P=u T=ow4 r=0" ); |
379 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1" , "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0" ); |
380 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0" ); |
381 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512ih7oc512oh7kh3ph1" , "p=x3 fsp=0 c=0 P=u T=oc2ow8 r=0" ); |
382 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512iw49oc2048ow49kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc4ow8 r=0" ); |
383 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc128ow784kw1pw0" , "p=x2 fsp=0 c=0 P=u T=oc2 r=0" ); |
384 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic512iw784oc256ow784kw1pw0" , "p=x3 fsp=0 c=0 P=u T=oc4ow2 r=0" ); |
385 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic64ih56oc64oh56kh3ph1" , "p=x3 fsp=0 c=0 P=u T=ow8 r=0" ); |
386 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc256ow3136kw1pw0" , "p=x2 fsp=0 c=0 P=u T=oc2mb4 r=0" ); |
387 | add("hw=xehpc dir=fwd cfg=bf16bf16bf16 mb=128+ desc=ic64iw3136oc64ow3136kw1pw0" , "p=x0 fsp=0 c=0 P=u T=ow2 r=0" ); |
388 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic1024ih14oc2048oh7kh1sh2ph0" , "p=x3 fsp=0 c=0 T=oc2mb8 r=0 stg=1" ); |
389 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc256ow196kw1pw0" , "p=x3 fsp=1 c=0 T=oc4 r=0" ); |
390 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic1024iw196oc512ow196kw1pw0" , "p=x3 fsp=1 c=0 T=oc4osp4 r=0" ); |
391 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic128ih28oc128oh28kh3ph1" , "p=x3 fsp=1 c=0 T=oc2osp4 r=0" ); |
392 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic128ih56oc128oh28kh3sh2ph1" , "p=x3 fsp=1 c=0 T=oc2osp4 r=0" ); |
393 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic128iw784oc512ow784kw1pw0 post_ops=sum" , "p=x0 fsp=0 c=0 T=oc2 r=0" ); |
394 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic2048iw49oc512ow49kw1pw0" , "p=x3 fsp=1 c=0 T=oc4mb8 r=0 stg=1" ); |
395 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256ih14oc256oh14kh3ph1" , "p=x3 fsp=1 c=0 T=oc2osp4 r=0" ); |
396 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256ih28oc256oh14kh3sh2ph1" , "p=x3 fsp=0 c=0 T=oc4ow2 r=0" ); |
397 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256ih56oc512oh28kh1sh2ph0" , "p=x3 fsp=0 c=0 T=oc4mb2 r=0" ); |
398 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256iw196oc1024ow196kw1pw0 post_ops=sum" , "p=x3 fsp=0 c=0 T=oc4 r=0" ); |
399 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc128ow3136kw1pw0" , "p=x0 fsp=0 c=0 T=mb2 r=0 stg=1" ); |
400 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic256iw3136oc64ow3136kw1pw0" , "p=x0 fsp=0 c=0 T= r=0" ); |
401 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic3ih224oc64oh112kh7sh2ph3" , "p=x0 c=0 T= r=0" ); |
402 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512ih14oc512oh7kh3sh2ph1" , "p=x3 fsp=0 c=0 T=oc4ow8 r=0" ); |
403 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512ih28oc1024oh14kh1sh2ph0" , "p=x3 fsp=1 c=0 T=oc4osp4 r=0" ); |
404 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512ih7oc512oh7kh3ph1" , "p=x3 fsp=1 c=0 T=oc2mb8 r=0 stg=1" ); |
405 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512iw49oc2048ow49kw1pw0 post_ops=sum" , "p=x3 fsp=0 c=0 T=oc4mb8 r=0 stg=1" ); |
406 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc128ow784kw1pw0" , "p=x3 fsp=1 c=0 T=mb2oc2 r=0 stg=1" ); |
407 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic512iw784oc256ow784kw1pw0" , "p=x2 fsp=0 c=0 T=oc4ow2 r=0" ); |
408 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64ih56oc64oh56kh3ph1" , "p=x3 fsp=1 c=0 T=osp4 r=0" ); |
409 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0 post_ops=sum" , "p=x0 fsp=0 c=0 T=mb2 r=0" ); |
410 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc256ow3136kw1pw0" , "p=x1 fsp=1 c=0 T=oc2osp8 r=0" ); |
411 | add("hw=xehpc dir=fwd cfg=x8x8* mb=128+ desc=ic64iw3136oc64ow3136kw1pw0" , "p=x0 fsp=0 c=0 T=ow2 r=0" ); |
412 | // clang-format on |
413 | } |
414 | |
415 | const char *conv_config_lookup_table_t::find(const conv_config_t &cfg) const { |
416 | auto key = cfg.prb().desc_str(/*print_mb=*/false); |
417 | auto it = map_.find(key); |
418 | if (it == map_.end()) return nullptr; |
419 | for (auto &e : it->second) { |
420 | if (e.filter.matches(cfg.prb(), cfg.hw_cfg())) return e.s_params; |
421 | } |
422 | return nullptr; |
423 | } |
424 | |
425 | void conv_config_lookup_table_t::add(const char *s_prb, const char *s_params) { |
426 | conv_problem_filter_t filter(s_prb); |
427 | map_[filter.key()].push_back(entry_t {filter, s_params}); |
428 | } |
429 | |
430 | } // namespace jit |
431 | } // namespace gpu |
432 | } // namespace impl |
433 | } // namespace dnnl |
434 | |