1 | namespace dnnl { |
2 | namespace impl { |
3 | namespace gpu { |
4 | namespace ocl { |
5 | const char *gen9_gemm_copy_kernel = R"==(/******************************************************************************* )==" "\n" |
6 | R"==(* Copyright 2019-2022 Intel Corporation )==" "\n" |
7 | R"==(* )==" "\n" |
8 | R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==" "\n" |
9 | R"==(* you may not use this file except in compliance with the License. )==" "\n" |
10 | R"==(* You may obtain a copy of the License at )==" "\n" |
11 | R"==(* )==" "\n" |
12 | R"==(* http: )==" "\n" |
13 | R"==(* )==" "\n" |
14 | R"==(* Unless required by applicable law or agreed to in writing, software )==" "\n" |
15 | R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==" "\n" |
16 | R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==" "\n" |
17 | R"==(* See the License for the specific language governing permissions and )==" "\n" |
18 | R"==(* limitations under the License. )==" "\n" |
19 | R"==(*******************************************************************************/ )==" "\n" |
20 | R"==(#include "gpu/ocl/gemm/ocl_gemm_attrs.h" )==" "\n" |
21 | R"==(#include "gpu/ocl/ocl_types.h" )==" "\n" |
22 | R"==(__kernel void gen9_gemm_copy(long m, long n, __global SRC_DATA_T *a, )==" "\n" |
23 | R"==(long offseta, long lda, __global float *alpha, __global DATA_T *b, )==" "\n" |
24 | R"==(long offsetb) { )==" "\n" |
25 | R"==(int idx = get_group_id(0); )==" "\n" |
26 | R"==(int idy = get_group_id(1) * COPY_UNROLL; )==" "\n" |
27 | R"==(int i; )==" "\n" |
28 | R"==(#ifdef USE_TRANS )==" "\n" |
29 | R"==(offseta += (idy + idx * lda); )==" "\n" |
30 | R"==(#else )==" "\n" |
31 | R"==(offseta += (idx + idy * lda); )==" "\n" |
32 | R"==(#endif )==" "\n" |
33 | R"==(offsetb += (idy * m + idx * COPY_UNROLL); )==" "\n" |
34 | R"==(n -= idy; )==" "\n" |
35 | R"==(for (i = 0; i < COPY_UNROLL; i++) { )==" "\n" |
36 | R"==(b[offsetb] )==" "\n" |
37 | R"==(= (i < n) ? (ATTR_ALPHA * SRC_TO_REF(a[offseta])) : DATA_ZERO; )==" "\n" |
38 | R"==(#ifdef USE_TRANS )==" "\n" |
39 | R"==(offseta++; )==" "\n" |
40 | R"==(#else )==" "\n" |
41 | R"==(offseta += lda; )==" "\n" |
42 | R"==(#endif )==" "\n" |
43 | R"==(offsetb++; )==" "\n" |
44 | R"==(} )==" "\n" |
45 | R"==(} )==" "\n" |
46 | R"==()==" ; |
47 | } |
48 | } |
49 | } |
50 | } |