jit_generator.hpp source code [oneDNN/src/gpu/jit/jit_generator.hpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_JIT_JIT_GENERATOR_HPP
18	#define GPU_JIT_JIT_GENERATOR_HPP
19
20	#include <memory>
21
22	#include "common/bfloat16.hpp"
23	#include "common/float16.hpp"
24	#include "common/nstl.hpp"
25	#include "gpu/jit/jit_generator_base.hpp"
26	#include "oneapi/dnnl/dnnl_config.h"
27
28	namespace ngen {
29	using bfloat16 = dnnl::impl::bfloat16_t;
30	using half = dnnl::impl::float16_t;
31	} // namespace ngen
32
33	#define NGEN_BFLOAT16_TYPE
34	#define NGEN_HALF_TYPE
35
36	#include "gpu/jit/ngen/ngen_opencl.hpp"
37
38	namespace dnnl {
39	namespace impl {
40	namespace gpu {
41	namespace jit {
42
43	using gpu_gen_t = ngen::HW;
44	constexpr gpu_gen_t gpu_gen9 = ngen::HW::Gen9;
45	constexpr gpu_gen_t gpu_gen11 = ngen::HW::Gen11;
46	constexpr gpu_gen_t gpu_xe_lp = ngen::HW::XeLP;
47	constexpr gpu_gen_t gpu_xe_hp = ngen::HW::XeHP;
48	constexpr gpu_gen_t gpu_xe_hpg = ngen::HW::XeHPG;
49	constexpr gpu_gen_t gpu_xe_hpc = ngen::HW::XeHPC;
50
51	// nGEN jit generator
52	//
53	// The main purpose of this header file is to provide extra features for nGEN
54	// kernel generator, e.g. additional macros and debugging capabilities.
55	//
56	// Jit generator provides additional memory to simplify kernel debugging. This
57	// memory is allocated using Shared Virtual Memory (SVM) feature in OpenCL 2.0.
58	// SVM enables the host and device portions of an OpenCL application to
59	// seamlessly share pointers and complex pointer-containing data-structures.
60	// This memory can be used to dump state of GPU registers or view GPU memory on
61	// the host in debugger.
62	//
63	// In order to use debug memory:
64	// 1. Allocate it using 'void jit_generator::dbg_alloc(cl_context context)'
65	// 2. Get memory pointer using 'void jit_generator::dbg_memory()'*
66	// 3. Pass it as extra OpenCL kernel argument and define it as new argument in
67	// kernel interface at corresponding order.
68	// 4. Set a breakpoint after 'dnnl_stream_wait()', memory will be available on
69	// the host side after kernel execution.
70	//
71	// A short example below demonstrates how to use debug memory:
72	//
73	// ``` c++
74	// status_t primitive_impl_t::execute(const exec_ctx_t &ctx) {
75	// ...
76	// auto gpu_engine = utils::downcast<ocl_gpu_engine>(engine);*
77	// jit_generator->dbg_alloc(gpu_engine->context());
78	// void dbg_mem = jit_generator->dbg_memory();*
79	// ...
80	// compute::kernel_arg_list_t arg_list;
81	// arg_list.set(0, src);
82	// arg_list.set(1, dst);
83	// arg_list.set(2, dbg_mem, kernel_arg_t::kind_t::svm);
84	// ...
85	// parallel_for(ctx, nd_range, kernel_, arg_list);
86	// }
87	//
88	// ngen_kernel_t() : jit_generator<...>() {
89	// externalName("ngen_kernel");
90	// newArgument("src", GlobalPtr);
91	// newArgument("dst", GlobalPtr);
92	// newArgument("dbg_mem", GlobalPtr);
93	// finalizeInterface();
94	// ...
95	// auto header = r32;
96	// auto data = r64;
97	// mov<uint64_t>(1, r64, getArgument("dbg_mem"));
98	// store(1, scattered_dword(), A64, header, data);
99	// ...
100	// }
101	// ```
102	//
103
104	template <gpu_gen_t hw>
105	struct jit_eltwise_injector_f32;
106
107	template <gpu_gen_t hw>
108	struct jit_post_op_injector;
109
110	template <gpu_gen_t hw>
111	class jit_generator : public ngen::OpenCLCodeGenerator<hw>,
112	public jit_generator_base {
113	friend struct jit_eltwise_injector_f32<hw>;
114
115	friend struct jit_post_op_injector<hw>;
116
117	private:
118	#ifdef CL_VERSION_2_0
119	struct svm_deleter {
120	cl_context context_;
121
122	void operator()(void ptr) noexcept* {
123	if (ptr) clSVMFree(context_, ptr);
124	}
125	};
126	std::unique_ptr<void, svm_deleter> dbg_memory_;
127	#endif
128
129	public:
130	jit_generator() = default;
131
132	const char kernel_name() const* override {
133	return ngen::OpenCLCodeGenerator<hw>::getExternalName().c_str();
134	}
135
136	cl_kernel get_kernel(cl_context context, cl_device_id device) override {
137	return ngen::OpenCLCodeGenerator<hw>::getKernel(context, device);
138	}
139
140	#ifdef CL_VERSION_2_0
141	void dbg_alloc(cl_context context);
142	void dbg_memory() const* { return dbg_memory_.get(); }
143	#endif
144
145	void emath(ngen::MathFunction fc, int simd, ngen::GRF dst, ngen::GRF src) {
146	const int max_exec_size = ngen::GRF::bytes(hw) / sizeof(float);
147	for (; simd > `0`; simd -= max_exec_size, dst ++, src ++)
148	this->math(nstl::min(simd, max_exec_size), fc, dst, src);
149	}
150	void eexp(int simd, const ngen::GRF &dst, const ngen::GRF &src) {
151	emath(ngen::MathFunction::exp, simd, dst, src);
152	}
153	void einv(int simd, const ngen::GRF &dst, const ngen::GRF &src) {
154	emath(ngen::MathFunction::inv, simd, dst, src);
155	}
156	};
157
158	#ifdef CL_VERSION_2_0
159	template <gpu_gen_t hw>
160	void jit_generator<hw>::dbg_alloc(cl_context context) {
161	constexpr size_t size = `1048576`;
162	void *mem = clSVMAlloc(
163	context, CL_MEM_READ_WRITE \| CL_MEM_SVM_FINE_GRAIN_BUFFER, size, `0`);
164	dbg_memory_ = decltype(dbg_memory_)(mem, svm_deleter {context});
165	memset(mem, `0xcd`, size);
166	}
167	#endif
168
169	} // namespace jit
170	} // namespace gpu
171	} // namespace impl
172	} // namespace dnnl
173
174	#endif // GPU_JIT_JIT_GENERATOR_HPP
175

Browse the source code of oneDNN/src/gpu/jit/jit_generator.hpp