rnn_reorder_kernel.cpp source code [oneDNN/build/src/gpu/ocl/rnn_reorder_kernel.cpp]

1	namespace dnnl {
2	namespace impl {
3	namespace gpu {
4	namespace ocl {
5	const char rnn_reorder_kernel = R"==(/****************************************************************************** )==""\n"
6	R"==(* Copyright 2019-2022 Intel Corporation )==""\n"
7	R"==(* )==""\n"
8	R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==""\n"
9	R"==(* you may not use this file except in compliance with the License. )==""\n"
10	R"==(* You may obtain a copy of the License at )==""\n"
11	R"==(* )==""\n"
12	R"==(* http: )==""\n"
13	R"==(* )==""\n"
14	R"==(* Unless required by applicable law or agreed to in writing, software )==""\n"
15	R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==""\n"
16	R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==""\n"
17	R"==(* See the License for the specific language governing permissions and )==""\n"
18	R"==(* limitations under the License. )==""\n"
19	R"==(*******************************************************************************/ )==""\n"
20	R"==(#define DT_UNDEF )==""\n"
21	R"==(#include "gpu/ocl/ocl_types.h" )==""\n"
22	R"==(#if IN_TYPE_F16 \|\| OUT_TYPE_F16 )==""\n"
23	R"==(#pragma OPENCL EXTENSION cl_khr_fp16 : enable )==""\n"
24	R"==(#endif )==""\n"
25	R"==(#define IN_OFF(x0, x1, x2, x3, x4, x5) \ )==""\n"
26	R"==((((x0) % SRC_B0) * SRC_SB0 + ((x0) / SRC_B0) * SRC_S0 \ )==""\n"
27	R"==(+ ((x1) % SRC_B1) * SRC_SB1 + ((x1) / SRC_B1) * SRC_S1 \ )==""\n"
28	R"==(+ ((x2) % SRC_B2) * SRC_SB2 + ((x2) / SRC_B2) * SRC_S2 \ )==""\n"
29	R"==(+ ((x3) % SRC_B3) * SRC_SB3 + ((x3) / SRC_B3) * SRC_S3 \ )==""\n"
30	R"==(+ ((x4) % SRC_B4) * SRC_SB4 + ((x4) / SRC_B4) * SRC_S4 \ )==""\n"
31	R"==(+ ((x5) % SRC_B5) * SRC_SB5 + ((x5) / SRC_B5) * SRC_S5) )==""\n"
32	R"==(#define OUT_OFF(x0, x1, x2, x3, x4, x5) \ )==""\n"
33	R"==((((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \ )==""\n"
34	R"==(+ ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1 \ )==""\n"
35	R"==(+ ((x2) % DST_B2) * DST_SB2 + ((x2) / DST_B2) * DST_S2 \ )==""\n"
36	R"==(+ ((x3) % DST_B3) * DST_SB3 + ((x3) / DST_B3) * DST_S3 \ )==""\n"
37	R"==(+ ((x4) % DST_B4) * DST_SB4 + ((x4) / DST_B4) * DST_S4 \ )==""\n"
38	R"==(+ ((x5) % DST_B5) * DST_SB5 + ((x5) / DST_B5) * DST_S5) )==""\n"
39	R"==(#if IN_TYPE_F32 )==""\n"
40	R"==(#define DT_IN float )==""\n"
41	R"==(#else )==""\n"
42	R"==(#error Unimplemented )==""\n"
43	R"==(#endif )==""\n"
44	R"==(#if OUT_TYPE_S8 )==""\n"
45	R"==(#define DT_OUT char )==""\n"
46	R"==(#else )==""\n"
47	R"==(#error Unimplemented )==""\n"
48	R"==(#endif )==""\n"
49	R"==(#if OUT_TYPE_S8 )==""\n"
50	R"==(#define CONVERT_F32_TO_OUT convert_char_sat_rte )==""\n"
51	R"==(#define CONVERT_F32_TO_OUT8 convert_char8_sat_rte )==""\n"
52	R"==(#else )==""\n"
53	R"==(#error Unimplemented )==""\n"
54	R"==(#endif )==""\n"
55	R"==(#define CONVERT_IN_TO_OUT(x) CONVERT_F32_TO_OUT(x) )==""\n"
56	R"==(#define QZ_B0(v, scale) CONVERT_F32_TO_OUT(v *scale) )==""\n"
57	R"==(#define REORDER(_out, _in, _s) \ )==""\n"
58	R"==(do { \ )==""\n"
59	R"==(_out = QZ_B0(_in, _s); \ )==""\n"
60	R"==(} while (0) )==""\n"
61	R"==(#define COMP_DT float )==""\n"
62	R"==(#define COMP_DST_OFFSET_EL (DST_D0 * DST_S0) )==""\n"
63	R"==(#define COMP_OFF(i0, i1, i2, i3) \ )==""\n"
64	R"==(((((i0) * (DST_D1) + (i1)) * (DST_D3) + (i2)) * (DST_D4) + (i3)) )==""\n"
65	R"==(KERNEL_ATTR )==""\n"
66	R"==(__kernel void wei_reorder(__global DT_IN input, __global DT_IN scales, )==""\n"
67	R"==(__global DT_OUT *output) { )==""\n"
68	R"==(__global char temp = (__global char )(output + COMP_DST_OFFSET_EL); )==""\n"
69	R"==(__global COMP_DT *comp )==""\n"
70	R"==(= (__global COMP_DT *)(((unsigned long)temp + (sizeof(COMP_DT) - 1)) )==""\n"
71	R"==(& -sizeof(COMP_DT)); )==""\n"
72	R"==(#if REF_REORDER )==""\n"
73	R"==(const int d0 = GWS_GET_D0(); )==""\n"
74	R"==(const int d1 = GWS_GET_D1(); )==""\n"
75	R"==(const int d3 = GWS_GET_D3(); )==""\n"
76	R"==(const int d4 = GWS_GET_D4(); )==""\n"
77	R"==(#if MASK )==""\n"
78	R"==(float s = scales[d3 * SRC_D4 + d4]; )==""\n"
79	R"==(#else )==""\n"
80	R"==(float s = scales[0]; )==""\n"
81	R"==(#endif )==""\n"
82	R"==(int reduction = 0; )==""\n"
83	R"==(for (int d2 = 0; d2 < SRC_D2; ++d2) { )==""\n"
84	R"==(const int in_off = IN_OFF(d0, d1, d2, d3, d4, 0); )==""\n"
85	R"==(const int out_off = OUT_OFF(d0, d1, d2, d3, d4, 0); )==""\n"
86	R"==(REORDER(output[out_off], input[in_off], s); )==""\n"
87	R"==(reduction += convert_int(QZ_B0(input[in_off], s)); )==""\n"
88	R"==(} )==""\n"
89	R"==(comp[COMP_OFF(d0, d1, d3, d4)] = convert_float(reduction); )==""\n"
90	R"==(#else )==""\n"
91	R"==(#error Unimplemented )==""\n"
92	R"==(#endif )==""\n"
93	R"==(} )==""\n"
94	R"==()==";
95	}
96	}
97	}
98	}

Browse the source code of oneDNN/build/src/gpu/ocl/rnn_reorder_kernel.cpp