1namespace dnnl {
2namespace impl {
3namespace gpu {
4namespace ocl {
5const char *ocl_scales_header = R"==(/******************************************************************************* )==""\n"
6R"==(* Copyright 2022 Intel Corporation )==""\n"
7R"==(* )==""\n"
8R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==""\n"
9R"==(* you may not use this file except in compliance with the License. )==""\n"
10R"==(* You may obtain a copy of the License at )==""\n"
11R"==(* )==""\n"
12R"==(* http: )==""\n"
13R"==(* )==""\n"
14R"==(* Unless required by applicable law or agreed to in writing, software )==""\n"
15R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==""\n"
16R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==""\n"
17R"==(* See the License for the specific language governing permissions and )==""\n"
18R"==(* limitations under the License. )==""\n"
19R"==(*******************************************************************************/ )==""\n"
20R"==(#ifndef GPU_OCL_OCL_SCALES_H )==""\n"
21R"==(#define GPU_OCL_OCL_SCALES_H )==""\n"
22R"==(inline void block_read_scales(float4 *data, int idx, int sg_local_id, )==""\n"
23R"==(__global float *runtime_scales) { )==""\n"
24R"==(if (OC > idx + (SUB_GROUP_SIZE * 4)) { )==""\n"
25R"==(*data = as_float4(intel_sub_group_block_read4( )==""\n"
26R"==((__global uint *)&runtime_scales[idx])); )==""\n"
27R"==(} else { )==""\n"
28R"==(float local_dat[4] = {}; )==""\n"
29R"==(for (int i = 0; i < 4; ++i) )==""\n"
30R"==(if (idx + ((i + 1) * SUB_GROUP_SIZE) <= OC) { )==""\n"
31R"==(local_dat[i] = as_float(intel_sub_group_block_read( )==""\n"
32R"==((__global uint *)&runtime_scales[idx )==""\n"
33R"==(+ (SUB_GROUP_SIZE * i)])); )==""\n"
34R"==(} else if (idx + (i * SUB_GROUP_SIZE) + sg_local_id < OC) { )==""\n"
35R"==(local_dat[i] = runtime_scales[idx + (SUB_GROUP_SIZE * i) )==""\n"
36R"==(+ sg_local_id]; )==""\n"
37R"==(} )==""\n"
38R"==((*data).s0 = local_dat[0]; )==""\n"
39R"==((*data).s1 = local_dat[1]; )==""\n"
40R"==((*data).s2 = local_dat[2]; )==""\n"
41R"==((*data).s3 = local_dat[3]; )==""\n"
42R"==(} )==""\n"
43R"==(} )==""\n"
44R"==(#endif )==""\n"
45R"==()==";
46}
47}
48}
49}