1 | namespace dnnl { |
2 | namespace impl { |
3 | namespace gpu { |
4 | namespace ocl { |
5 | const char *xe_lp_gemm_nocopy_scale_x8x8s32_kernel = R"==(/******************************************************************************* )==" "\n" |
6 | R"==(* Copyright 2019-2022 Intel Corporation )==" "\n" |
7 | R"==(* )==" "\n" |
8 | R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==" "\n" |
9 | R"==(* you may not use this file except in compliance with the License. )==" "\n" |
10 | R"==(* You may obtain a copy of the License at )==" "\n" |
11 | R"==(* )==" "\n" |
12 | R"==(* http: )==" "\n" |
13 | R"==(* )==" "\n" |
14 | R"==(* Unless required by applicable law or agreed to in writing, software )==" "\n" |
15 | R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==" "\n" |
16 | R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==" "\n" |
17 | R"==(* See the License for the specific language governing permissions and )==" "\n" |
18 | R"==(* limitations under the License. )==" "\n" |
19 | R"==(*******************************************************************************/ )==" "\n" |
20 | R"==(#include "gpu/ocl/gemm/ocl_gemm_attrs.h" )==" "\n" |
21 | R"==(#include "gpu/ocl/ocl_post_ops.h" )==" "\n" |
22 | R"==(#include "gpu/ocl/ocl_types.h" )==" "\n" |
23 | R"==(#if WITH_ELTWISE == 1 )==" "\n" |
24 | R"==(#define POST_OP(val) \ )==" "\n" |
25 | R"==(do { \ )==" "\n" |
26 | R"==(if (apply_eltwise) \ )==" "\n" |
27 | R"==(val = fwd_eltwise( \ )==" "\n" |
28 | R"==(val, eltwise_alpha, eltwise_beta, eltwise_scale); \ )==" "\n" |
29 | R"==(} while (0) )==" "\n" |
30 | R"==(#else )==" "\n" |
31 | R"==(#define POST_OP(val) )==" "\n" |
32 | R"==(#endif )==" "\n" |
33 | R"==(kernel void xe_lp_gemm_scale_x8x8s32(global int *cc, global int *c, char trc, )==" "\n" |
34 | R"==(int offset_c, int m, int n, int ldc, global float *alpha, float beta, )==" "\n" |
35 | R"==(global int *co, int offset_co, int alpha_is_zero, int apply_eltwise, )==" "\n" |
36 | R"==(float eltwise_alpha, float eltwise_beta, float eltwise_scale) { )==" "\n" |
37 | R"==(int idx = get_group_id(0); )==" "\n" |
38 | R"==(int idy = get_group_id(1); )==" "\n" |
39 | R"==(int lid = get_local_id(0); )==" "\n" |
40 | R"==(int j; )==" "\n" |
41 | R"==(int offset_cc = 0; )==" "\n" |
42 | R"==(int offset_x = 0; )==" "\n" |
43 | R"==(int ldcc = m; )==" "\n" |
44 | R"==(m -= 32 * idx; )==" "\n" |
45 | R"==(if (m > 32) m = 32; )==" "\n" |
46 | R"==(n -= 16 * idy; )==" "\n" |
47 | R"==(if (n > 16) n = 16; )==" "\n" |
48 | R"==(m -= 32 * lid / 16; )==" "\n" |
49 | R"==(if ((m <= 0) || (n <= 0)) return; )==" "\n" |
50 | R"==(offset_cc = 32 * idx + 32 * lid / 16 + 16 * idy * ldcc; )==" "\n" |
51 | R"==(offset_c += 32 * idx + 32 * lid / 16 + 16 * idy * ldc; )==" "\n" |
52 | R"==(if (trc == 'C') offset_co += 32 * idx + 32 * lid / 16; )==" "\n" |
53 | R"==(if (trc == 'R') offset_co += 16 * idy; )==" "\n" |
54 | R"==(for (j = 0; j < n; j++) { )==" "\n" |
55 | R"==(if (m > 0) { )==" "\n" |
56 | R"==(float val = (alpha_is_zero ? 0 : ATTR_ALPHA) * cc[offset_cc + 0] )==" "\n" |
57 | R"==(+ beta * c[offset_c + 0]; )==" "\n" |
58 | R"==(POST_OP(val); )==" "\n" |
59 | R"==(c[offset_c] = convert_int_sat_rte( )==" "\n" |
60 | R"==(val + (co ? co[offset_co + offset_x] : 0)); )==" "\n" |
61 | R"==(if (trc == 'C') { offset_x++; } )==" "\n" |
62 | R"==(} )==" "\n" |
63 | R"==(if (m > 1) { )==" "\n" |
64 | R"==(float val = (alpha_is_zero ? 0 : ATTR_ALPHA) * cc[offset_cc + 1] )==" "\n" |
65 | R"==(+ beta * c[offset_c + 1]; )==" "\n" |
66 | R"==(POST_OP(val); )==" "\n" |
67 | R"==(c[offset_c + 1] = convert_int_sat_rte( )==" "\n" |
68 | R"==(val + (co ? co[offset_co + offset_x] : 0)); )==" "\n" |
69 | R"==(} )==" "\n" |
70 | R"==(offset_cc += ldcc; )==" "\n" |
71 | R"==(offset_c += ldc; )==" "\n" |
72 | R"==(if (trc == 'C') offset_x = 0; )==" "\n" |
73 | R"==(if (trc == 'R') offset_x++; )==" "\n" |
74 | R"==(} )==" "\n" |
75 | R"==(} )==" "\n" |
76 | R"==()==" ; |
77 | } |
78 | } |
79 | } |
80 | } |