1namespace dnnl {
2namespace impl {
3namespace gpu {
4namespace ocl {
5const char *ref_shuffle_kernel = R"==(/******************************************************************************* )==""\n"
6R"==(* Copyright 2019-2020 Intel Corporation )==""\n"
7R"==(* )==""\n"
8R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==""\n"
9R"==(* you may not use this file except in compliance with the License. )==""\n"
10R"==(* You may obtain a copy of the License at )==""\n"
11R"==(* )==""\n"
12R"==(* http: )==""\n"
13R"==(* )==""\n"
14R"==(* Unless required by applicable law or agreed to in writing, software )==""\n"
15R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==""\n"
16R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==""\n"
17R"==(* See the License for the specific language governing permissions and )==""\n"
18R"==(* limitations under the License. )==""\n"
19R"==(*******************************************************************************/ )==""\n"
20R"==(#define DT_UNDEF 1 )==""\n"
21R"==(#include "gpu/ocl/ocl_types.h" )==""\n"
22R"==(#include "gpu/ocl/ocl_math_utils.h" )==""\n"
23R"==(#undef SRC_OFF )==""\n"
24R"==(#undef DST_OFF )==""\n"
25R"==(#define SRC_OFF(x0, x1, x2, x3, x4, x5) \ )==""\n"
26R"==(OFF_MD(SRC, (x0), (x1), (x2), (x3), (x4), (x5)) )==""\n"
27R"==(#define DST_OFF(x0, x1, x2, x3, x4, x5) \ )==""\n"
28R"==(OFF_MD(DST, (x0), (x1), (x2), (x3), (x4), (x5)) )==""\n"
29R"==(int rev_transposed(int a) { )==""\n"
30R"==(return ((a % TRANSPOSE_COL) * TRANSPOSE_ROW + a / TRANSPOSE_COL); )==""\n"
31R"==(} )==""\n"
32R"==(__kernel void ref_shuffle(__global DATA_T *src, __global DATA_T *dst) { )==""\n"
33R"==(src += SRC_OFFSET0; )==""\n"
34R"==(dst += DST_OFFSET0; )==""\n"
35R"==(int d[5]; )==""\n"
36R"==(d[0] = GWS_GET_D0(); )==""\n"
37R"==(d[1] = GWS_GET_D1(); )==""\n"
38R"==(d[2] = GWS_GET_D2(); )==""\n"
39R"==(d[3] = GWS_GET_D3(); )==""\n"
40R"==(d[4] = GWS_GET_D4(); )==""\n"
41R"==(d[5] = GWS_GET_D5(); )==""\n"
42R"==(const ulong src_off = SRC_OFF(d[0], d[1], d[2], d[3], d[4], d[5]); )==""\n"
43R"==(d[AXIS] = rev_transposed(d[AXIS]); )==""\n"
44R"==(const ulong dst_off = DST_OFF(d[0], d[1], d[2], d[3], d[4], d[5]); )==""\n"
45R"==(dst[dst_off] = src[src_off]; )==""\n"
46R"==(} )==""\n"
47R"==()==";
48}
49}
50}
51}