1namespace dnnl {
2namespace impl {
3namespace gpu {
4namespace ocl {
5const char *ref_reduction_kernel = R"==(/******************************************************************************* )==""\n"
6R"==(* Copyright 2020-2022 Intel Corporation )==""\n"
7R"==(* )==""\n"
8R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==""\n"
9R"==(* you may not use this file except in compliance with the License. )==""\n"
10R"==(* You may obtain a copy of the License at )==""\n"
11R"==(* )==""\n"
12R"==(* http: )==""\n"
13R"==(* )==""\n"
14R"==(* Unless required by applicable law or agreed to in writing, software )==""\n"
15R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==""\n"
16R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==""\n"
17R"==(* See the License for the specific language governing permissions and )==""\n"
18R"==(* limitations under the License. )==""\n"
19R"==(*******************************************************************************/ )==""\n"
20R"==(#include "gpu/ocl/ocl_post_ops.h" )==""\n"
21R"==(#include "gpu/ocl/ocl_types.h" )==""\n"
22R"==(#if defined(IS_MAX) )==""\n"
23R"==(#define INIT_ACC TO_DEF_ACC_DATA_T(DATA_MIN) )==""\n"
24R"==(#elif defined(IS_MIN) )==""\n"
25R"==(#define INIT_ACC TO_DEF_ACC_DATA_T(DATA_MAX) )==""\n"
26R"==(#elif defined(IS_MUL) )==""\n"
27R"==(#define INIT_ACC TO_DEF_ACC_DATA_T(DATA_ONE) )==""\n"
28R"==(#else )==""\n"
29R"==(#define INIT_ACC TO_DEF_ACC_DATA_T(DATA_ZERO) )==""\n"
30R"==(#endif )==""\n"
31R"==(#if defined(IS_MAX) )==""\n"
32R"==(#if defined(SRC_DT_S8) || defined(SRC_DT_U8) )==""\n"
33R"==(#define ACCUMULATE(x, y) max(x, y) )==""\n"
34R"==(#else )==""\n"
35R"==(#define ACCUMULATE(x, y) fmax(x, y) )==""\n"
36R"==(#endif )==""\n"
37R"==(#elif defined(IS_MIN) )==""\n"
38R"==(#if defined(SRC_DT_S8) || defined(SRC_DT_U8) )==""\n"
39R"==(#define ACCUMULATE(x, y) min(x, y) )==""\n"
40R"==(#else )==""\n"
41R"==(#define ACCUMULATE(x, y) fmin(x, y) )==""\n"
42R"==(#endif )==""\n"
43R"==(#elif defined(IS_MEAN) || defined(IS_SUM) )==""\n"
44R"==(#define ACCUMULATE(x, y) (x + y) )==""\n"
45R"==(#elif defined(IS_MUL) )==""\n"
46R"==(#define ACCUMULATE(x, y) (x * y) )==""\n"
47R"==(#else )==""\n"
48R"==(#define ACCUMULATE(x, y) (x + pow(fabs(y), POWER)) )==""\n"
49R"==(#endif )==""\n"
50R"==(#if defined(IS_MEAN) )==""\n"
51R"==(#define FINALIZE(x) (x / DIV) )==""\n"
52R"==(#elif defined(IS_LP_MAX) )==""\n"
53R"==(#define FINALIZE(x) rootn(fmax(x, EPS), POWER) )==""\n"
54R"==(#elif defined(IS_LP_SUM) )==""\n"
55R"==(#define FINALIZE(x) rootn(x + EPS, POWER) )==""\n"
56R"==(#elif defined(IS_P_MAX) )==""\n"
57R"==(#define FINALIZE(x) fmax(x, EPS) )==""\n"
58R"==(#elif defined(IS_P_SUM) )==""\n"
59R"==(#define FINALIZE(x) (x + EPS) )==""\n"
60R"==(#else )==""\n"
61R"==(#define FINALIZE(x) (x) )==""\n"
62R"==(#endif )==""\n"
63R"==(#if NDIMS == 6 )==""\n"
64R"==(#define _SRC_OFF(x0, x1, x2, x3, x4, x5) OFF_MD(SRC, x0, x1, x2, x3, x4, x5) )==""\n"
65R"==(#define _DST_OFF(x0, x1, x2, x3, x4, x5) OFF_MD(DST, x0, x1, x2, x3, x4, x5) )==""\n"
66R"==(#elif NDIMS == 1 )==""\n"
67R"==(#define _SRC_OFF(x0, x1, x2, x3, x4, x5) (x0) )==""\n"
68R"==(#define _DST_OFF(x0, x1, x2, x3, x4, x5) (x0) )==""\n"
69R"==(#else )==""\n"
70R"==(#define _SRC_OFF(x0, x1, ignore, x3, x4, x5) SRC_OFF(x0, x1, x3, x4, x5) )==""\n"
71R"==(#define _DST_OFF(x0, x1, ignore, x3, x4, x5) DST_OFF(x0, x1, x3, x4, x5) )==""\n"
72R"==(#endif )==""\n"
73R"==(#if NDIMS == 6 )==""\n"
74R"==(#define ITERATE_OVER_REDUCTION_D2 \ )==""\n"
75R"==(for_(int d2_off = 0; d2_off < REDUCTION_D2; d2_off++) )==""\n"
76R"==(#define D2_OFF d2_off )==""\n"
77R"==(#else )==""\n"
78R"==(#define ITERATE_OVER_REDUCTION_D2 )==""\n"
79R"==(#define D2_OFF 0 )==""\n"
80R"==(#endif )==""\n"
81R"==(#define _DST_OFF_MODULO_DIM(x0, x1, x2, x3, x4, x5) \ )==""\n"
82R"==(({ \ )==""\n"
83R"==(int ret_val; \ )==""\n"
84R"==(if (NDIMS == 1) \ )==""\n"
85R"==(ret_val = _DST_OFF(x0 % DST_D0, 0, 0, 0, 0, 0); \ )==""\n"
86R"==(else if (NDIMS == 2) \ )==""\n"
87R"==(ret_val = _DST_OFF(x0 % DST_D0, x1 % DST_D1, 0, 0, 0, 0); \ )==""\n"
88R"==(else if (NDIMS == 3) \ )==""\n"
89R"==(ret_val = _DST_OFF( \ )==""\n"
90R"==(x0 % DST_D0, x1 % DST_D1, 0, 0, 0, x5 % DST_D2); \ )==""\n"
91R"==(else if (NDIMS == 4) \ )==""\n"
92R"==(ret_val = _DST_OFF( \ )==""\n"
93R"==(x0 % DST_D0, x1 % DST_D1, 0, 0, x4 % DST_D2, x5 % DST_D3); \ )==""\n"
94R"==(else if (NDIMS == 5) \ )==""\n"
95R"==(ret_val = _DST_OFF(x0 % DST_D0, x1 % DST_D1, 0, x3 % DST_D2, \ )==""\n"
96R"==(x4 % DST_D3, x5 % DST_D4); \ )==""\n"
97R"==(else \ )==""\n"
98R"==(ret_val = _DST_OFF(x0 % DST_D0, x1 % DST_D1, x2 % DST_D2, \ )==""\n"
99R"==(x3 % DST_D3, x4 % DST_D4, x5 % DST_D5); \ )==""\n"
100R"==(ret_val; \ )==""\n"
101R"==(}) )==""\n"
102R"==(__kernel void ref_reduce( )==""\n"
103R"==(__global SRC_DATA_T *src, __global DST_DATA_T *dst POST_OP_ARGS) { )==""\n"
104R"==(int d0 = GWS_GET_D0(); )==""\n"
105R"==(int d1 = GWS_GET_D1(); )==""\n"
106R"==(int d2 = GWS_GET_D2(); )==""\n"
107R"==(int d3 = GWS_GET_D3(); )==""\n"
108R"==(int d4 = GWS_GET_D4(); )==""\n"
109R"==(int d5 = GWS_GET_D5(); )==""\n"
110R"==(DEF_ACC_DATA_T acc = INIT_ACC; )==""\n"
111R"==(for_(int d0_off = 0; d0_off < REDUCTION_D0; d0_off++) )==""\n"
112R"==(for_(int d1_off = 0; d1_off < REDUCTION_D1; d1_off++) )==""\n"
113R"==(ITERATE_OVER_REDUCTION_D2 )==""\n"
114R"==(for_(int d3_off = 0; d3_off < REDUCTION_D3; d3_off++) )==""\n"
115R"==(for_(int d4_off = 0; d4_off < REDUCTION_D4; d4_off++) )==""\n"
116R"==(for_(int d5_off = 0; d5_off < REDUCTION_D5; d5_off++) )==""\n"
117R"==({ )==""\n"
118R"==(const int src_off = _SRC_OFF(d0 + d0_off, d1 + d1_off, d2 + D2_OFF, )==""\n"
119R"==(d3 + d3_off, d4 + d4_off, d5 + d5_off); )==""\n"
120R"==(acc = ACCUMULATE(acc, TO_DEF_ACC_DATA_T(src[src_off])); )==""\n"
121R"==(} )==""\n"
122R"==(float res = convert_float(acc); )==""\n"
123R"==(res = FINALIZE(res); )==""\n"
124R"==(const int dst_off = _DST_OFF_MODULO_DIM(d0, d1, d2, d3, d4, d5); )==""\n"
125R"==(const int dst_off_pd = _DST_OFF(d0, d1, d2, d3, d4, d5); )==""\n"
126R"==(float dst_val; )==""\n"
127R"==(#if WITH_SUM )==""\n"
128R"==(dst_val = DST_TO_REF(dst[dst_off]); )==""\n"
129R"==(#endif )==""\n"
130R"==(#if NDIMS == 4 )==""\n"
131R"==(#if REDUCTION_D1 != 1 )==""\n"
132R"==(d1 = 0; )==""\n"
133R"==(d2 = d4; )==""\n"
134R"==(d3 = d5; )==""\n"
135R"==(#elif REDUCTION_D4 != 1 )==""\n"
136R"==(d2 = 0; )==""\n"
137R"==(d3 = d5; )==""\n"
138R"==(#elif REDUCTION_D5 != 1 )==""\n"
139R"==(d2 = d4; )==""\n"
140R"==(d3 = 0; )==""\n"
141R"==(#endif )==""\n"
142R"==(APPLY_POST_OPS_SERIAL( )==""\n"
143R"==(res, float, dst_val, float, d0, 1, d1, 1, d2, 1, d3, 1, 0, 1, 0, 1); )==""\n"
144R"==(#elif NDIMS == 5 )==""\n"
145R"==(#if REDUCTION_D1 != 1 )==""\n"
146R"==(d1 = 0; )==""\n"
147R"==(#elif REDUCTION_D5 != 1 )==""\n"
148R"==(d5 = 0; )==""\n"
149R"==(#endif )==""\n"
150R"==(APPLY_POST_OPS_SERIAL(res, float, dst_val, float, d0, 1, d1, 1, d3, 1, d4, )==""\n"
151R"==(1, d5, 1, 0, 1); )==""\n"
152R"==(#else )==""\n"
153R"==(APPLY_POST_OPS_SERIAL(res, float, dst_val, float, d0, 1, d1, 1, d2, 1, d3, )==""\n"
154R"==(1, d4, 1, d5, 1); )==""\n"
155R"==(#endif )==""\n"
156R"==(if (dst_off_pd != dst_off) res = 0.f; )==""\n"
157R"==(dst[dst_off_pd] = TO_DST(res); )==""\n"
158R"==(} )==""\n"
159R"==()==";
160}
161}
162}
163}