1namespace dnnl {
2namespace impl {
3namespace gpu {
4namespace ocl {
5const char *gemm_post_ops_inner_product_kernel = R"==(/******************************************************************************* )==""\n"
6R"==(* Copyright 2019-2020 Intel Corporation )==""\n"
7R"==(* )==""\n"
8R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==""\n"
9R"==(* you may not use this file except in compliance with the License. )==""\n"
10R"==(* You may obtain a copy of the License at )==""\n"
11R"==(* )==""\n"
12R"==(* http: )==""\n"
13R"==(* )==""\n"
14R"==(* Unless required by applicable law or agreed to in writing, software )==""\n"
15R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==""\n"
16R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==""\n"
17R"==(* See the License for the specific language governing permissions and )==""\n"
18R"==(* limitations under the License. )==""\n"
19R"==(*******************************************************************************/ )==""\n"
20R"==(#include "gpu/ocl/ocl_post_ops.h" )==""\n"
21R"==(#include "gpu/ocl/ocl_types.h" )==""\n"
22R"==(#ifdef DST_DT_F32 )==""\n"
23R"==(#define DST_TO_ACC(x) (x) )==""\n"
24R"==(#else )==""\n"
25R"==(#define DST_TO_ACC(x) TO_DEF_ACC_DATA_T(x) )==""\n"
26R"==(#endif )==""\n"
27R"==(#ifdef BIAS_DT_F32 )==""\n"
28R"==(#define BIAS_TO_ACC(x) (x) )==""\n"
29R"==(#else )==""\n"
30R"==(#define BIAS_TO_ACC(x) TO_DEF_ACC_DATA_T(x) )==""\n"
31R"==(#endif )==""\n"
32R"==(#ifdef SRC_DT_F32 )==""\n"
33R"==(#define SRC_TO_ACC(x) (x) )==""\n"
34R"==(#else )==""\n"
35R"==(#define SRC_TO_ACC(x) TO_DEF_ACC_DATA_T(x) )==""\n"
36R"==(#endif )==""\n"
37R"==(__kernel void gemm_post_ops_inner_product(__global SRC_DATA_T *src, )==""\n"
38R"==(__global BIAS_DATA_T *bias, __global DST_DATA_T *dst POST_OP_ARGS, )==""\n"
39R"==(__global SPAD_DATA_T *scratchpad, global float *scales) { )==""\n"
40R"==(const size_t mb = get_global_id(0) / OC; )==""\n"
41R"==(const size_t oc = get_global_id(0) % OC; )==""\n"
42R"==(const size_t data_idx = mb * OC + oc; )==""\n"
43R"==(#if USE_TEMP_DST == 1 )==""\n"
44R"==(ACC_DATA_T acc = SRC_TO_ACC(scratchpad[data_idx]); )==""\n"
45R"==(#else )==""\n"
46R"==(ACC_DATA_T acc = SRC_TO_ACC(src[data_idx]); )==""\n"
47R"==(#endif )==""\n"
48R"==(#if WITH_BIAS == 1 )==""\n"
49R"==(acc += BIAS_TO_ACC(bias[oc]); )==""\n"
50R"==(#endif )==""\n"
51R"==(#if WITH_SCALES )==""\n"
52R"==(#if SCALES_COMMON )==""\n"
53R"==(const float scale = scales[0]; )==""\n"
54R"==(#elif SCALES_PER_OC )==""\n"
55R"==(const float scale = scales[oc]; )==""\n"
56R"==(#else )==""\n"
57R"==(#error "Unsupported scale type" )==""\n"
58R"==(#endif )==""\n"
59R"==(acc *= scale; )==""\n"
60R"==(#endif )==""\n"
61R"==(float sum_src; )==""\n"
62R"==(#if WITH_SUM )==""\n"
63R"==(sum_src = DST_TO_ACC(dst[data_idx]); )==""\n"
64R"==(#endif )==""\n"
65R"==(float accumulator = acc; )==""\n"
66R"==(APPLY_POST_OPS_SERIAL_BINARY_2D( )==""\n"
67R"==(accumulator, float, sum_src, float, mb, 1, oc, 1); )==""\n"
68R"==(dst[data_idx] = TO_DST(accumulator); )==""\n"
69R"==(} )==""\n"
70R"==()==";
71}
72}
73}
74}