1 | namespace dnnl { |
2 | namespace impl { |
3 | namespace gpu { |
4 | namespace ocl { |
5 | const char *ref_shuffle_kernel = R"==(/******************************************************************************* )==" "\n" |
6 | R"==(* Copyright 2019-2020 Intel Corporation )==" "\n" |
7 | R"==(* )==" "\n" |
8 | R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==" "\n" |
9 | R"==(* you may not use this file except in compliance with the License. )==" "\n" |
10 | R"==(* You may obtain a copy of the License at )==" "\n" |
11 | R"==(* )==" "\n" |
12 | R"==(* http: )==" "\n" |
13 | R"==(* )==" "\n" |
14 | R"==(* Unless required by applicable law or agreed to in writing, software )==" "\n" |
15 | R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==" "\n" |
16 | R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==" "\n" |
17 | R"==(* See the License for the specific language governing permissions and )==" "\n" |
18 | R"==(* limitations under the License. )==" "\n" |
19 | R"==(*******************************************************************************/ )==" "\n" |
20 | R"==(#define DT_UNDEF 1 )==" "\n" |
21 | R"==(#include "gpu/ocl/ocl_types.h" )==" "\n" |
22 | R"==(#include "gpu/ocl/ocl_math_utils.h" )==" "\n" |
23 | R"==(#undef SRC_OFF )==" "\n" |
24 | R"==(#undef DST_OFF )==" "\n" |
25 | R"==(#define SRC_OFF(x0, x1, x2, x3, x4, x5) \ )==" "\n" |
26 | R"==(OFF_MD(SRC, (x0), (x1), (x2), (x3), (x4), (x5)) )==" "\n" |
27 | R"==(#define DST_OFF(x0, x1, x2, x3, x4, x5) \ )==" "\n" |
28 | R"==(OFF_MD(DST, (x0), (x1), (x2), (x3), (x4), (x5)) )==" "\n" |
29 | R"==(int rev_transposed(int a) { )==" "\n" |
30 | R"==(return ((a % TRANSPOSE_COL) * TRANSPOSE_ROW + a / TRANSPOSE_COL); )==" "\n" |
31 | R"==(} )==" "\n" |
32 | R"==(__kernel void ref_shuffle(__global DATA_T *src, __global DATA_T *dst) { )==" "\n" |
33 | R"==(src += SRC_OFFSET0; )==" "\n" |
34 | R"==(dst += DST_OFFSET0; )==" "\n" |
35 | R"==(int d[5]; )==" "\n" |
36 | R"==(d[0] = GWS_GET_D0(); )==" "\n" |
37 | R"==(d[1] = GWS_GET_D1(); )==" "\n" |
38 | R"==(d[2] = GWS_GET_D2(); )==" "\n" |
39 | R"==(d[3] = GWS_GET_D3(); )==" "\n" |
40 | R"==(d[4] = GWS_GET_D4(); )==" "\n" |
41 | R"==(d[5] = GWS_GET_D5(); )==" "\n" |
42 | R"==(const ulong src_off = SRC_OFF(d[0], d[1], d[2], d[3], d[4], d[5]); )==" "\n" |
43 | R"==(d[AXIS] = rev_transposed(d[AXIS]); )==" "\n" |
44 | R"==(const ulong dst_off = DST_OFF(d[0], d[1], d[2], d[3], d[4], d[5]); )==" "\n" |
45 | R"==(dst[dst_off] = src[src_off]; )==" "\n" |
46 | R"==(} )==" "\n" |
47 | R"==()==" ; |
48 | } |
49 | } |
50 | } |
51 | } |