1namespace dnnl {
2namespace impl {
3namespace gpu {
4namespace ocl {
5const char *gen9_wino_conv_fwd_data_2x3_kernel = R"==(/******************************************************************************* )==""\n"
6R"==(* Copyright 2020 Intel Corporation )==""\n"
7R"==(* )==""\n"
8R"==(* Licensed under the Apache License, Version 2.0 (the "License"); )==""\n"
9R"==(* you may not use this file except in compliance with the License. )==""\n"
10R"==(* You may obtain a copy of the License at )==""\n"
11R"==(* )==""\n"
12R"==(* http: )==""\n"
13R"==(* )==""\n"
14R"==(* Unless required by applicable law or agreed to in writing, software )==""\n"
15R"==(* distributed under the License is distributed on an "AS IS" BASIS, )==""\n"
16R"==(* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. )==""\n"
17R"==(* See the License for the specific language governing permissions and )==""\n"
18R"==(* limitations under the License. )==""\n"
19R"==(*******************************************************************************/ )==""\n"
20R"==(#include "gpu/ocl/ocl_post_ops.h" )==""\n"
21R"==(#include "gpu/ocl/ocl_types.h" )==""\n"
22R"==(#define BLOCK_SIZE OC_BLOCK )==""\n"
23R"==(#define BLOCKED_DATA_T CONCAT2(DATA_T, BLOCK_SIZE) )==""\n"
24R"==(#define BLOCKED_READ(ptr) vload16(0, ptr) )==""\n"
25R"==(#define BLOCKED_WRITE(data, ptr) \ )==""\n"
26R"==(do { \ )==""\n"
27R"==(BLOCKED_DATA_T result = data; \ )==""\n"
28R"==(unroll_for(int _i = 0; _i < BLOCK_SIZE; _i++) { \ )==""\n"
29R"==((ptr)[_i] = result[_i]; \ )==""\n"
30R"==(} \ )==""\n"
31R"==(} while (0) )==""\n"
32R"==(#define VECT_SIZE 4 )==""\n"
33R"==(#define VECT_DATA_T CONCAT2(DATA_T, VECT_SIZE) )==""\n"
34R"==(#define AS_VECT_DATA_T AS_DATA4_T )==""\n"
35R"==(#define AS_VECT_BLOCK_DATA_T AS_BLOCK_DATA4_T )==""\n"
36R"==(#define OC_OUTER_BLOCK OC_BLOCK )==""\n"
37R"==(#define IC_OUTER_BLOCK IC_BLOCK )==""\n"
38R"==(#define WINO_D (WINO_M + WINO_R - 1) )==""\n"
39R"==(#define VW WINO_IW )==""\n"
40R"==(#define VH WINO_IH )==""\n"
41R"==(#define VC WINO_IC )==""\n"
42R"==(#define MW WINO_OW )==""\n"
43R"==(#define MH WINO_OH )==""\n"
44R"==(#define MC WINO_OC )==""\n"
45R"==(static inline int off_nCdhw16c( )==""\n"
46R"==(int n, int c, int d, int h, int w, int C, int D, int H, int W) { )==""\n"
47R"==(int off = 0; )==""\n"
48R"==(off += n * (C / 16) * D * H * W * 16; )==""\n"
49R"==(off += (c / 16) * D * H * W * 16; )==""\n"
50R"==(off += d * H * W * 16; )==""\n"
51R"==(off += h * W * 16; )==""\n"
52R"==(off += w * 16; )==""\n"
53R"==(off += c % 16; )==""\n"
54R"==(return off; )==""\n"
55R"==(} )==""\n"
56R"==(static inline int off_NCdhw16n16c( )==""\n"
57R"==(int n, int c, int d, int h, int w, int C, int D, int H, int W) { )==""\n"
58R"==(int off = 0; )==""\n"
59R"==(off += (n / 16) * (C / 16) * D * H * W * 16 * 16; )==""\n"
60R"==(off += (c / 16) * D * H * W * 16 * 16; )==""\n"
61R"==(off += d * H * W * 16 * 16; )==""\n"
62R"==(off += h * W * 16 * 16; )==""\n"
63R"==(off += w * 16 * 16; )==""\n"
64R"==(off += (n % 16) * 16; )==""\n"
65R"==(off += (c % 16); )==""\n"
66R"==(return off; )==""\n"
67R"==(} )==""\n"
68R"==(static inline int off_gIOdhw16i16o(int g, int o, int i, int d, int h, int w, )==""\n"
69R"==(int O, int I, int D, int H, int W) { )==""\n"
70R"==(int off = 0; )==""\n"
71R"==(off += g * (I / 16) * (O / 16) * D * H * W * 16 * 16; )==""\n"
72R"==(off += (i / 16) * (O / 16) * D * H * W * 16 * 16; )==""\n"
73R"==(off += (o / 16) * D * H * W * 16 * 16; )==""\n"
74R"==(off += d * H * W * 16 * 16; )==""\n"
75R"==(off += h * W * 16 * 16; )==""\n"
76R"==(off += w * 16 * 16; )==""\n"
77R"==(off += (i % 16) * 16; )==""\n"
78R"==(off += (o % 16); )==""\n"
79R"==(return off; )==""\n"
80R"==(} )==""\n"
81R"==(static inline int src_off(int n, int c, int d, int h, int w) { )==""\n"
82R"==(if (SRC_W16C) return off_nCdhw16c(n, c, d, h, w, G * IC, 1, IH, IW); )==""\n"
83R"==(if (SRC_16N16C) return off_NCdhw16n16c(n, c, d, h, w, G * IC, 1, IH, IW); )==""\n"
84R"==(return 0; )==""\n"
85R"==(} )==""\n"
86R"==(static inline int wei_off(int g, int o, int i, int d, int h, int w) { )==""\n"
87R"==(return off_gIOdhw16i16o(g, o, i, d, h, w, OC, IC, 1, KH, KW); )==""\n"
88R"==(} )==""\n"
89R"==(static inline int U_off(int o, int i, int h, int z) { )==""\n"
90R"==(int off = z * KH * WINO_IC * WINO_OC; )==""\n"
91R"==(off += h * WINO_IC * WINO_OC; )==""\n"
92R"==(off += i * WINO_OC; )==""\n"
93R"==(off += o; )==""\n"
94R"==(return off; )==""\n"
95R"==(} )==""\n"
96R"==(static inline int V_off(int n, int i, int h, int w, int z) { )==""\n"
97R"==(int off = n * WINO_D * VW * VH * VC; )==""\n"
98R"==(off += z * VW * VH * VC; )==""\n"
99R"==(off += w * VH * VC; )==""\n"
100R"==(off += (h + PH) * VC; )==""\n"
101R"==(off += i; )==""\n"
102R"==(return off; )==""\n"
103R"==(} )==""\n"
104R"==(static inline int M_off(int n, int o, int h, int w, int z) { )==""\n"
105R"==(int off = n * WINO_D * MW * MH * MC; )==""\n"
106R"==(off += z * MW * MH * MC; )==""\n"
107R"==(off += w * MH * MC; )==""\n"
108R"==(off += h * MC; )==""\n"
109R"==(off += o; )==""\n"
110R"==(return off; )==""\n"
111R"==(} )==""\n"
112R"==(static inline int dst_off(int n, int c, int d, int h, int w) { )==""\n"
113R"==(if (DST_W16C) return off_nCdhw16c(n, c, d, h, w, G * OC, 1, OH, OW); )==""\n"
114R"==(if (DST_16N16C) return off_NCdhw16n16c(n, c, d, h, w, G * OC, 1, OH, OW); )==""\n"
115R"==(return 0; )==""\n"
116R"==(} )==""\n"
117R"==(__kernel void gen9_wino_wei_transform_2x3( )==""\n"
118R"==(__global DATA_T *U, const __global DATA_T *weights) { )==""\n"
119R"==(const uint weights_tile_width = WINO_M; )==""\n"
120R"==(const uint weights_tile_height = 1; )==""\n"
121R"==(const uint in_kw = get_global_id(0) * weights_tile_width; )==""\n"
122R"==(const uint in_kh = get_global_id(1) * weights_tile_height; )==""\n"
123R"==(const uint U_tile_width = WINO_D; )==""\n"
124R"==(const uint U_tile_height = 1; )==""\n"
125R"==(const uint out_kw = get_global_id(0) * U_tile_width; )==""\n"
126R"==(const uint out_kh = get_global_id(1) * U_tile_height; )==""\n"
127R"==(const uint ic = get_global_id(2) % WINO_IC; )==""\n"
128R"==(const uint oc = get_global_id(2) / WINO_IC; )==""\n"
129R"==(uint in_idx = wei_off(0, oc, ic, 0, in_kh, in_kw); )==""\n"
130R"==(bool is_valid = ic < IC || oc < OC; )==""\n"
131R"==(VECT_DATA_T tile; )==""\n"
132R"==(tile.x = is_valid ? weights[in_idx] : 0; )==""\n"
133R"==(in_idx += wei_off(0, 0, 0, 0, 0, 1); )==""\n"
134R"==(tile.y = is_valid ? weights[in_idx] : 0; )==""\n"
135R"==(in_idx += wei_off(0, 0, 0, 0, 0, 1); )==""\n"
136R"==(tile.z = is_valid ? weights[in_idx] : 0; )==""\n"
137R"==(uint out_idx = U_off(oc, ic, out_kh, out_kw); )==""\n"
138R"==(U[out_idx] = tile.x; )==""\n"
139R"==(out_idx += U_off(0, 0, 0, 1); )==""\n"
140R"==(U[out_idx] = (tile.x + tile.y + tile.z) / 2; )==""\n"
141R"==(out_idx += U_off(0, 0, 0, 1); )==""\n"
142R"==(U[out_idx] = (tile.x - tile.y + tile.z) / 2; )==""\n"
143R"==(out_idx += U_off(0, 0, 0, 1); )==""\n"
144R"==(U[out_idx] = tile.z; )==""\n"
145R"==(} )==""\n"
146R"==(__kernel void gen9_wino_src_transform_2x3( )==""\n"
147R"==(__global DATA_T *V, const __global DATA_T *src) { )==""\n"
148R"==(const uint tile_id_x = get_global_id(0); )==""\n"
149R"==(const uint tile_id_y = get_global_id(1); )==""\n"
150R"==(const uint stride_x = WINO_M; )==""\n"
151R"==(const uint stride_y = 1; )==""\n"
152R"==(const uint iw = tile_id_x * stride_x - PW; )==""\n"
153R"==(const uint ih = tile_id_y * stride_y - PH; )==""\n"
154R"==(const uint ic = (get_global_id(2) % (WINO_IC / IC_BLOCK)) * IC_BLOCK; )==""\n"
155R"==(const uint n = get_global_id(2) / (WINO_IC / IC_BLOCK); )==""\n"
156R"==(const bool w0 = iw < 0 || iw >= IW; )==""\n"
157R"==(const bool w1 = iw + 1 < 0 || iw + 1 >= IW; )==""\n"
158R"==(const bool w2 = iw + 2 < 0 || iw + 2 >= IW; )==""\n"
159R"==(const bool w3 = iw + 3 < 0 || iw + 3 >= IW; )==""\n"
160R"==(const bool h0 = ih < 0 || ih >= IH || ic > IC; )==""\n"
161R"==(BLOCKED_DATA_T d0, d1, d2, d3; )==""\n"
162R"==(int in_idx = src_off(n, ic, 0, ih, iw); )==""\n"
163R"==(d0 = (h0 || w0) ? 0 : BLOCKED_READ(&src[in_idx]); )==""\n"
164R"==(in_idx += src_off(0, 0, 0, 0, 1); )==""\n"
165R"==(d1 = (h0 || w1) ? 0 : BLOCKED_READ(&src[in_idx]); )==""\n"
166R"==(in_idx += src_off(0, 0, 0, 0, 1); )==""\n"
167R"==(d2 = (h0 || w2) ? 0 : BLOCKED_READ(&src[in_idx]); )==""\n"
168R"==(in_idx += src_off(0, 0, 0, 0, 1); )==""\n"
169R"==(d3 = (h0 || w3) ? 0 : BLOCKED_READ(&src[in_idx]); )==""\n"
170R"==(int out_idx = V_off(n, ic, ih, tile_id_x, 0); )==""\n"
171R"==(BLOCKED_WRITE(d0 - d2, &V[out_idx]); )==""\n"
172R"==(out_idx += V_off(0, 0, -PH, 0, 1); )==""\n"
173R"==(BLOCKED_WRITE(d1 + d2, &V[out_idx]); )==""\n"
174R"==(out_idx += V_off(0, 0, -PH, 0, 1); )==""\n"
175R"==(BLOCKED_WRITE(-d1 + d2, &V[out_idx]); )==""\n"
176R"==(out_idx += V_off(0, 0, -PH, 0, 1); )==""\n"
177R"==(BLOCKED_WRITE(d1 - d3, &V[out_idx]); )==""\n"
178R"==(} )==""\n"
179R"==(__kernel void gen9_wino_dst_transform_2x3(__global DATA_T *dst, )==""\n"
180R"==(const __global DATA_T *M, const __global DATA_T *bias POST_OP_ARGS) { )==""\n"
181R"==(const uint tile_id_x = get_global_id(0); )==""\n"
182R"==(const uint tile_id_y = get_global_id(1); )==""\n"
183R"==(const uint dst_tile_width_x = WINO_M; )==""\n"
184R"==(const uint dst_tile_width_y = 1; )==""\n"
185R"==(const uint ow = tile_id_x * dst_tile_width_x; )==""\n"
186R"==(const uint oh = tile_id_y * dst_tile_width_y; )==""\n"
187R"==(const uint oc = (get_global_id(2) % (OC / OC_BLOCK)) * OC_BLOCK; )==""\n"
188R"==(const uint n = get_global_id(2) / (OC / OC_BLOCK); )==""\n"
189R"==(BLOCKED_DATA_T m0, m1, m2, m3; )==""\n"
190R"==(int M_idx = M_off(n, oc, tile_id_y, tile_id_x, 0); )==""\n"
191R"==(m0 = BLOCKED_READ(&M[M_idx]); )==""\n"
192R"==(M_idx += M_off(0, 0, 0, 0, 1); )==""\n"
193R"==(m1 = BLOCKED_READ(&M[M_idx]); )==""\n"
194R"==(M_idx += M_off(0, 0, 0, 0, 1); )==""\n"
195R"==(m2 = BLOCKED_READ(&M[M_idx]); )==""\n"
196R"==(M_idx += M_off(0, 0, 0, 0, 1); )==""\n"
197R"==(m3 = BLOCKED_READ(&M[M_idx]); )==""\n"
198R"==(BLOCKED_DATA_T C1 = m0 + m1 + m2; )==""\n"
199R"==(BLOCKED_DATA_T C2 = m1 - m2 - m3; )==""\n"
200R"==(if (WITH_BIAS || WITH_POST_OP) { )==""\n"
201R"==(const int c_size = WINO_M * OC_BLOCK; )==""\n"
202R"==(DATA_T C[c_size]; )==""\n"
203R"==(BLOCKED_WRITE(C1, &C[0]); )==""\n"
204R"==(BLOCKED_WRITE(C2, &C[OC_BLOCK]); )==""\n"
205R"==(if (WITH_BIAS) { )==""\n"
206R"==(for (int oc_outer = 0; oc_outer < OC_BLOCK; oc_outer++) { )==""\n"
207R"==(for (int ow_block = 0; ow_block < WINO_M; ow_block++) { )==""\n"
208R"==(const int c_off = ow_block * OC_BLOCK + oc_outer; )==""\n"
209R"==(const int bc_off = oc + oc_outer; )==""\n"
210R"==(C[c_off] += (OC_WO_PADDING % OC_BLOCK == 0 )==""\n"
211R"==(|| bc_off < OC_WO_PADDING) )==""\n"
212R"==(? bias[bc_off] )==""\n"
213R"==(: DATA_ZERO; )==""\n"
214R"==(} )==""\n"
215R"==(} )==""\n"
216R"==(} )==""\n"
217R"==(DATA_T S[c_size]; )==""\n"
218R"==(if (WITH_SUM) { )==""\n"
219R"==(BLOCKED_DATA_T S1, S2; )==""\n"
220R"==(int dst_idx = dst_off(n, oc, 0, oh, ow); )==""\n"
221R"==(S1 = BLOCKED_READ(&dst[dst_idx]); )==""\n"
222R"==(if (OW % WINO_M == 0 || ow < OW - 1) { )==""\n"
223R"==(dst_idx += dst_off(0, 0, 0, 0, 1); )==""\n"
224R"==(S2 = BLOCKED_READ(&dst[dst_idx]); )==""\n"
225R"==(} else { )==""\n"
226R"==(S2 = 0; )==""\n"
227R"==(} )==""\n"
228R"==(BLOCKED_WRITE(S1, &S[0]); )==""\n"
229R"==(BLOCKED_WRITE(S2, &S[OC_BLOCK]); )==""\n"
230R"==(} )==""\n"
231R"==(for (int didx = 0; didx < c_size; ++didx) { )==""\n"
232R"==(float accum = CONVERT_FLOAT_T(C[didx]); )==""\n"
233R"==(float sum = CONVERT_FLOAT_T(S[didx]); )==""\n"
234R"==(int po_oc = oc + c_size % OC_BLOCK; )==""\n"
235R"==(APPLY_POST_OPS_SERIAL_BINARY_2D( )==""\n"
236R"==(C, DATA_T, S, DATA_T, n, 1, po_oc, 1); )==""\n"
237R"==(C[didx] = TO_DATA_T(accum); )==""\n"
238R"==(} )==""\n"
239R"==(C1 = BLOCKED_READ(&C[0]); )==""\n"
240R"==(C2 = BLOCKED_READ(&C[OC_BLOCK]); )==""\n"
241R"==(} )==""\n"
242R"==(int dst_idx = dst_off(n, oc, 0, oh, ow); )==""\n"
243R"==(BLOCKED_WRITE(C1, &dst[dst_idx]); )==""\n"
244R"==(if (OW % WINO_M == 0 || ow < OW - 1) { )==""\n"
245R"==(dst_idx += dst_off(0, 0, 0, 0, 1); )==""\n"
246R"==(BLOCKED_WRITE(C2, &dst[dst_idx]); )==""\n"
247R"==(} )==""\n"
248R"==(} )==""\n"
249R"==(__attribute__((reqd_work_group_size(8, 1, 1))) __kernel void )==""\n"
250R"==(gen9_wino_conv_fwd_2x3(__global DATA_T *M, const __global DATA_T *V, )==""\n"
251R"==(const __global DATA_T *U_param) { )==""\n"
252R"==(const int VH_SIZE_VECT = V_off(0, 0, 1 - PH, 0, 0) / VECT_SIZE; )==""\n"
253R"==(const int MH_SIZE_VECT = M_off(0, 0, 1, 0, 0) / VECT_SIZE; )==""\n"
254R"==(const int U_IC_SIZE_VECT = U_off(0, 1, 0, 0) / VECT_SIZE; )==""\n"
255R"==(const int group_x = get_group_id(0); )==""\n"
256R"==(const int group_y = get_group_id(1); )==""\n"
257R"==(const int group_z = get_group_id(2); )==""\n"
258R"==(const int local_x = get_local_id(0); )==""\n"
259R"==(const int local_y = get_local_id(1); )==""\n"
260R"==(const int local_z = get_local_id(2); )==""\n"
261R"==(const int no_of_tiles_x = MW; )==""\n"
262R"==(const int no_of_tiles_y = MH; )==""\n"
263R"==(const int ow = (group_y * OH_BLOCK) / no_of_tiles_y; )==""\n"
264R"==(const int oh = (group_y * OH_BLOCK) % no_of_tiles_y; )==""\n"
265R"==(const int oc = group_x * WINO_OC_BLOCK )==""\n"
266R"==(+ local_x * VECT_SIZE; )==""\n"
267R"==(const int n = group_z / WINO_D; )==""\n"
268R"==(const int tile_w_offset = group_z % WINO_D; )==""\n"
269R"==(const int ih = oh - PH; )==""\n"
270R"==(const int iw = ow; )==""\n"
271R"==(const int ic = local_x * VECT_SIZE; )==""\n"
272R"==(VECT_DATA_T M0 = (VECT_DATA_T)(0.f); )==""\n"
273R"==(VECT_DATA_T M1 = (VECT_DATA_T)(0.f); )==""\n"
274R"==(VECT_DATA_T M2 = (VECT_DATA_T)(0.f); )==""\n"
275R"==(VECT_DATA_T M3 = (VECT_DATA_T)(0.f); )==""\n"
276R"==(VECT_DATA_T M4 = (VECT_DATA_T)(0.f); )==""\n"
277R"==(VECT_DATA_T M5 = (VECT_DATA_T)(0.f); )==""\n"
278R"==(VECT_DATA_T M6 = (VECT_DATA_T)(0.f); )==""\n"
279R"==(VECT_DATA_T M7 = (VECT_DATA_T)(0.f); )==""\n"
280R"==(const int M_idx = M_off(n, oc, oh, ow, tile_w_offset); )==""\n"
281R"==(__global VECT_DATA_T *dst = (__global VECT_DATA_T *)(M + M_idx); )==""\n"
282R"==(const int V_idx = V_off(n, ic, ih, iw, tile_w_offset); )==""\n"
283R"==(const __global VECT_DATA_T *V_tile = (__global VECT_DATA_T *)(V + V_idx); )==""\n"
284R"==(const int U_idx = U_off(oc, 0, 0, tile_w_offset); )==""\n"
285R"==(const __global VECT_DATA_T *U_tile )==""\n"
286R"==(= (__global VECT_DATA_T *)(U_param + U_idx); )==""\n"
287R"==(VECT_DATA_T a; )==""\n"
288R"==(for_(int kh = 0; kh < KH; kh++) )==""\n"
289R"==(for (int ic_idx = 0; ic_idx < WINO_IC; ic_idx += WINO_IC_BLOCK) { )==""\n"
290R"==(const VECT_DATA_T V0 = V_tile[0 * VH_SIZE_VECT]; )==""\n"
291R"==(const VECT_DATA_T V1 = V_tile[1 * VH_SIZE_VECT]; )==""\n"
292R"==(const VECT_DATA_T V2 = V_tile[2 * VH_SIZE_VECT]; )==""\n"
293R"==(const VECT_DATA_T V3 = V_tile[3 * VH_SIZE_VECT]; )==""\n"
294R"==(const VECT_DATA_T V4 = V_tile[4 * VH_SIZE_VECT]; )==""\n"
295R"==(const VECT_DATA_T V5 = V_tile[5 * VH_SIZE_VECT]; )==""\n"
296R"==(const VECT_DATA_T V6 = V_tile[6 * VH_SIZE_VECT]; )==""\n"
297R"==(const VECT_DATA_T V7 = V_tile[7 * VH_SIZE_VECT]; )==""\n"
298R"==(#define DOT_PRODUCT(_i, _j) \ )==""\n"
299R"==(do { \ )==""\n"
300R"==(a = AS_VECT_DATA_T( \ )==""\n"
301R"==(intel_sub_group_shuffle(AS_VECT_BLOCK_DATA_T(V##_i), _j)); \ )==""\n"
302R"==(M##_i = mad(a.x, U0, mad(a.y, U1, mad(a.z, U2, mad(a.w, U3, M##_i)))); \ )==""\n"
303R"==(} while (0) )==""\n"
304R"==(unroll_for(int j = 0; j < WINO_IC_BLOCK / VECT_SIZE; j++) { )==""\n"
305R"==(const VECT_DATA_T U0 = U_tile[0]; )==""\n"
306R"==(U_tile += U_IC_SIZE_VECT; )==""\n"
307R"==(const VECT_DATA_T U1 = U_tile[0]; )==""\n"
308R"==(U_tile += U_IC_SIZE_VECT; )==""\n"
309R"==(const VECT_DATA_T U2 = U_tile[0]; )==""\n"
310R"==(U_tile += U_IC_SIZE_VECT; )==""\n"
311R"==(const VECT_DATA_T U3 = U_tile[0]; )==""\n"
312R"==(U_tile += U_IC_SIZE_VECT; )==""\n"
313R"==(DOT_PRODUCT(0, j); )==""\n"
314R"==(DOT_PRODUCT(1, j); )==""\n"
315R"==(DOT_PRODUCT(2, j); )==""\n"
316R"==(DOT_PRODUCT(3, j); )==""\n"
317R"==(DOT_PRODUCT(4, j); )==""\n"
318R"==(DOT_PRODUCT(5, j); )==""\n"
319R"==(DOT_PRODUCT(6, j); )==""\n"
320R"==(DOT_PRODUCT(7, j); )==""\n"
321R"==(} )==""\n"
322R"==(#undef DOT_PRODUCT )==""\n"
323R"==(V_tile += WINO_IC_BLOCK / VECT_SIZE; )==""\n"
324R"==(} )==""\n"
325R"==(dst[0] = M0; )==""\n"
326R"==(dst += MH_SIZE_VECT; )==""\n"
327R"==(dst[0] = M1; )==""\n"
328R"==(dst += MH_SIZE_VECT; )==""\n"
329R"==(dst[0] = M2; )==""\n"
330R"==(dst += MH_SIZE_VECT; )==""\n"
331R"==(dst[0] = M3; )==""\n"
332R"==(dst += MH_SIZE_VECT; )==""\n"
333R"==(dst[0] = M4; )==""\n"
334R"==(dst += MH_SIZE_VECT; )==""\n"
335R"==(dst[0] = M5; )==""\n"
336R"==(dst += MH_SIZE_VECT; )==""\n"
337R"==(dst[0] = M6; )==""\n"
338R"==(dst += MH_SIZE_VECT; )==""\n"
339R"==(dst[0] = M7; )==""\n"
340R"==(} )==""\n"
341R"==()==";
342}
343}
344}
345}