1 | namespace dnnl { |
2 | namespace impl { |
3 | namespace gpu { |
4 | namespace ocl { |
5 | const char * = R"==(/******************************************************************************* )==" "\n" |
6 | R"==(* Copyright 2022 Intel Corporation )==" "\n" |
7 | R"==(* )==" "\n" |
8 | R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==" "\n" |
9 | R"==(* you may not use this file except in compliance with the License. )==" "\n" |
10 | R"==(* You may obtain a copy of the License at )==" "\n" |
11 | R"==(* )==" "\n" |
12 | R"==(* http: )==" "\n" |
13 | R"==(* )==" "\n" |
14 | R"==(* Unless required by applicable law or agreed to in writing, software )==" "\n" |
15 | R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==" "\n" |
16 | R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==" "\n" |
17 | R"==(* See the License for the specific language governing permissions and )==" "\n" |
18 | R"==(* limitations under the License. )==" "\n" |
19 | R"==(*******************************************************************************/ )==" "\n" |
20 | R"==(#ifndef GPU_OCL_OCL_SCALES_H )==" "\n" |
21 | R"==(#define GPU_OCL_OCL_SCALES_H )==" "\n" |
22 | R"==(inline void block_read_scales(float4 *data, int idx, int sg_local_id, )==" "\n" |
23 | R"==(__global float *runtime_scales) { )==" "\n" |
24 | R"==(if (OC > idx + (SUB_GROUP_SIZE * 4)) { )==" "\n" |
25 | R"==(*data = as_float4(intel_sub_group_block_read4( )==" "\n" |
26 | R"==((__global uint *)&runtime_scales[idx])); )==" "\n" |
27 | R"==(} else { )==" "\n" |
28 | R"==(float local_dat[4] = {}; )==" "\n" |
29 | R"==(for (int i = 0; i < 4; ++i) )==" "\n" |
30 | R"==(if (idx + ((i + 1) * SUB_GROUP_SIZE) <= OC) { )==" "\n" |
31 | R"==(local_dat[i] = as_float(intel_sub_group_block_read( )==" "\n" |
32 | R"==((__global uint *)&runtime_scales[idx )==" "\n" |
33 | R"==(+ (SUB_GROUP_SIZE * i)])); )==" "\n" |
34 | R"==(} else if (idx + (i * SUB_GROUP_SIZE) + sg_local_id < OC) { )==" "\n" |
35 | R"==(local_dat[i] = runtime_scales[idx + (SUB_GROUP_SIZE * i) )==" "\n" |
36 | R"==(+ sg_local_id]; )==" "\n" |
37 | R"==(} )==" "\n" |
38 | R"==((*data).s0 = local_dat[0]; )==" "\n" |
39 | R"==((*data).s1 = local_dat[1]; )==" "\n" |
40 | R"==((*data).s2 = local_dat[2]; )==" "\n" |
41 | R"==((*data).s3 = local_dat[3]; )==" "\n" |
42 | R"==(} )==" "\n" |
43 | R"==(} )==" "\n" |
44 | R"==(#endif )==" "\n" |
45 | R"==()==" ; |
46 | } |
47 | } |
48 | } |
49 | } |