1namespace dnnl {
2namespace impl {
3namespace gpu {
4namespace ocl {
5const char *gen9_gemm_copy_kernel = R"==(/******************************************************************************* )==""\n"
6R"==(* Copyright 2019-2022 Intel Corporation )==""\n"
7R"==(* )==""\n"
8R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==""\n"
9R"==(* you may not use this file except in compliance with the License. )==""\n"
10R"==(* You may obtain a copy of the License at )==""\n"
11R"==(* )==""\n"
12R"==(* http: )==""\n"
13R"==(* )==""\n"
14R"==(* Unless required by applicable law or agreed to in writing, software )==""\n"
15R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==""\n"
16R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==""\n"
17R"==(* See the License for the specific language governing permissions and )==""\n"
18R"==(* limitations under the License. )==""\n"
19R"==(*******************************************************************************/ )==""\n"
20R"==(#include "gpu/ocl/gemm/ocl_gemm_attrs.h" )==""\n"
21R"==(#include "gpu/ocl/ocl_types.h" )==""\n"
22R"==(__kernel void gen9_gemm_copy(long m, long n, __global SRC_DATA_T *a, )==""\n"
23R"==(long offseta, long lda, __global float *alpha, __global DATA_T *b, )==""\n"
24R"==(long offsetb) { )==""\n"
25R"==(int idx = get_group_id(0); )==""\n"
26R"==(int idy = get_group_id(1) * COPY_UNROLL; )==""\n"
27R"==(int i; )==""\n"
28R"==(#ifdef USE_TRANS )==""\n"
29R"==(offseta += (idy + idx * lda); )==""\n"
30R"==(#else )==""\n"
31R"==(offseta += (idx + idy * lda); )==""\n"
32R"==(#endif )==""\n"
33R"==(offsetb += (idy * m + idx * COPY_UNROLL); )==""\n"
34R"==(n -= idy; )==""\n"
35R"==(for (i = 0; i < COPY_UNROLL; i++) { )==""\n"
36R"==(b[offsetb] )==""\n"
37R"==(= (i < n) ? (ATTR_ALPHA * SRC_TO_REF(a[offseta])) : DATA_ZERO; )==""\n"
38R"==(#ifdef USE_TRANS )==""\n"
39R"==(offseta++; )==""\n"
40R"==(#else )==""\n"
41R"==(offseta += lda; )==""\n"
42R"==(#endif )==""\n"
43R"==(offsetb++; )==""\n"
44R"==(} )==""\n"
45R"==(} )==""\n"
46R"==()==";
47}
48}
49}
50}