stack_checker.hpp source code [oneDNN/src/common/stack_checker.hpp]

1	/*******************************************************************************
2	* Copyright 2021 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#if defined(DNNL_ENABLE_STACK_CHECKER)
18
19	#ifndef __linux__
20	#error "Stack checker is supported only on Linux"
21	#endif
22
23	#ifndef DNNL_ENABLE_CONCURRENT_EXEC
24	#error "Stack checker requires using concurrent scratchpad"
25	#endif
26
27	#ifndef COMMON_STACK_CHECKER_HPP
28	#define COMMON_STACK_CHECKER_HPP
29
30	#include <cassert>
31	#include <tuple>
32	#include <type_traits>
33
34	#include <pthread.h>
35	#include <unistd.h>
36	#include <sys/mman.h>
37
38	#include "common/cpp_compat.hpp"
39	#include "common/utils.hpp"
40
41	namespace dnnl {
42	namespace impl {
43	namespace stack_checker {
44
45	/ Stack checker*
46	*
47	* The purpose of the stack checker is to get information about stack
48	* consumption per call stack.
49	*
50	* Motivation for introducing such a capability was excessive stack consumption
51	* for `dnnl_primitive_create`, `dnnl_primitive_execute` and GEMM APIs that
52	* resulted in a crash on the customer side.
53	*
54	* The stack checker is represented as `stack_checker_t` class. The class
55	* provides an interface called `check(...)` that is used to get the information
56	* about stack consumption.
57	* The stack checker has a capability to issue an error when the obtained
58	* stack consumption exceeds a specified limit.
59	*
60	* The stack checker can be configured with the following environment variables:
61	* - DNNL_SC_STACK_SIZE: specifies the size of the stack in bytes for the thread
62	* that runs a function that needs to be checked.
63	* The default is 8388608 bytes (8 MiB).
64	*
65	* - DNNL_SC_SOFT_STACK_LIMIT: specifies a soft limit in memory pages. When
66	* stack consumption exceeds the limit the stack checker prints an error
67	* message that contains the obtained stack consumption. The default is 5
68	* pages (20480 bytes).
69	*
70	* - DNNL_SC_HARD_STACK_LIMIT: specifies a hard limit in memory pages. When
71	* the limit is exceeded the SIGSEGV signal is raised. This can be used for
72	* debug purposes. For example, it can be used to get a place within the call
73	* stack where the limit is exceeded. By default, the limit is equal to the
74	* `stack size` / `page size` - all memory is available.
75	* for debug purposes.
76	*
77	* - DNNL_SC_TRACE: enables tracing. If the soft limit is exceeded and the
78	* tracing is enabled the stack checker prints an error message. The tracing
79	* is enabled by default.
80	*
81	* The `stack_checker_t` class has one constructor that takes an `std::string`
82	* which is printed out as part of the error message when soft limit is
83	* exceeded. This can be useful to give a context about the function that is
84	* being checked.
85	*
86	* Implementation details
87	*
88	* The stack checker uses pthread API to create a new thread with
89	* an application-managed stack. The application-managed stack is a memory
90	* buffer allocated by an application and designated as a stack via
91	* a certain pthread API. Since the stack checker has control over the
92	* memory buffer it can populate it with a particular pattern. Once
93	* the thread completed execution of the function being checked it can check
94	* how much memory was actually used for the stack by checking the pattern.
95	*
96	* The stack checker is disabled in the default build configuration. It can
97	* be enabled via CMake option `DNNL_ENABLE_STACK_CHECKER=ON` at the build time.
98	*
99	* Usage example
100	*
101	* ```cpp
102	* #include "common/stack_checker.hpp"
103	*
104	* void bar() {
105	* volatile char arr[1024] = {};
106	* }
107	*
108	* int foo(int *a, int &b, int c) {
109	* bar();
110	* return 0;
111	* }
112	*
113	* int main() {
114	* int x = 5;
115	* stack_checker::stack_checker_t sc("main");
116	* return sc.check(foo, &x, std::ref(x), x);
117	* }
118	* ```
119	* If the soft limit is 3 pages then the output of this code will be the
120	* following:
121	* === Stack checker: ERROR: 'main' consumed 14824 bytes of stack while the limit is 12288 bytes. ===
122	*
123	* Limitations:
124	* - There is only Linux support
125	* - The functions being checked should be non-member functions
126	* - Works only with the concurrent scratchpad because the global scratchpad is
127	* global per thread (thread local).
128	*/
129
130	template <typename F, typename... Targs>
131	struct thread_args_t {
132	thread_args_t() = delete;
133	thread_args_t(const F &func, const Targs &... func_args)
134	: func(func)
135	, func_args(std::forward<Targs>(func_args)...)
136	, func_retval {} {}
137	const F &func;
138	std::tuple<Targs...> func_args;
139	typename cpp_compat::invoke_result<F *, Targs...>::type func_retval;
140	};
141
142	template <typename T>
143	constexpr size_t get_number_args() {
144	return std::tuple_size<typename std::remove_reference<T>::type> {};
145	}
146
147	// The executor_t is a helper class that is used to prepare arguments for
148	// the function and call it.
149	template <size_t i>
150	struct executor_t {
151	template <typename T, typename... Targs>
152	static void execute(T &thread_args, Targs &... unpacked_func_args) {
153	const auto &func_args = thread_args.func_args;
154	constexpr size_t idx = get_number_args<decltype(func_args)>() - i;
155	executor_t<i - `1`>::execute(thread_args,
156	std::forward<Targs>(unpacked_func_args)...,
157	std::get<idx>(func_args));
158	}
159	};
160
161	template <>
162	struct executor_t<`0`> {
163	template <typename T, typename... Targs>
164	static void execute(T &thread_args, Targs &... unpacked_func_args) {
165	thread_args.func_retval
166	= thread_args.func(std::forward<Targs>(unpacked_func_args)...);
167	}
168	};
169
170	struct stack_checker_t {
171	stack_checker_t(const std::string &context) : context_(context) {}
172
173	template <typename F, typename... Targs>
174	typename cpp_compat::invoke_result<F *, Targs...>::type check(
175	const F &func, const Targs &... func_args) {
176
177	auto thread_args = utils::make_unique<thread_args_t<F, const Targs...>>(
178	func, std::forward<const Targs>(func_args)...);
179
180	int8_t *stack_buffer;
181	int res = posix_memalign(
182	(void **)&stack_buffer, get_page_size(), get_stack_size());
183	assert(res == `0`);
184
185	std::memset(stack_buffer, pattern_, sizeof(int8_t) * get_stack_size());
186
187	// Stack grows downwards.
188	int8_t *stack_start = stack_buffer + get_stack_size();
189	int8_t *stack_end
190	= stack_start - get_page_size() * get_hard_stack_limit();
191	size_t protected_region
192	= get_stack_size() - get_page_size() * get_hard_stack_limit();
193
194	res = mprotect(
195	stack_end - protected_region, protected_region, PROT_NONE);
196	assert(res == `0`);
197
198	pthread_t thread;
199	pthread_attr_t attr;
200	res = pthread_attr_init(&attr);
201	assert(res == `0`);
202
203	res = pthread_attr_setstack(&attr, stack_buffer, get_stack_size());
204	assert(res == `0`);
205
206	res = pthread_attr_setguardsize(&attr, `0`);
207	assert(res == `0`);
208
209	res = pthread_create(
210	&thread, &attr, worker<F, Targs...>, (void *)thread_args.get());
211	assert(res == `0`);
212
213	void stack_consumption_ptr = nullptr*;
214	res = pthread_join(thread, &stack_consumption_ptr);
215	assert(res == `0`);
216
217	auto stack_consumption
218	= reinterpret_cast<size_t>(stack_consumption_ptr);
219
220	if (is_trace_enabled()) {
221	size_t soft_stack_limit_in_bytes
222	= get_soft_stack_limit() * get_page_size();
223	if (stack_consumption > soft_stack_limit_in_bytes) {
224	printf("=== Stack checker: ERROR: '%s' consumed %lu bytes of "
225	"stack while the limit is %lu bytes. ===\n",
226	context_.c_str(), stack_consumption,
227	soft_stack_limit_in_bytes);
228	fflush(stdout);
229	}
230	}
231
232	res = pthread_attr_destroy(&attr);
233	assert(res == `0`);
234	MAYBE_UNUSED(res);
235	// POSIX Thread standard: 2.9.8 Use of Application-Managed Thread Stacks
236	// The application grants to the implementation permanent ownership of
237	// and control over the application-managed stack when the attributes
238	// object in which the stack or stackaddr attribute has been set is used
239	// free(stack_buffer);
240
241	return thread_args->func_retval;
242	}
243
244	private:
245	std::string context_;
246	static constexpr int8_t pattern_ = INT8_MAX;
247
248	// The worker function is a wrapper for the function being checked.
249	// The worker starts when a new thread is created.
250	template <typename F, typename... Types>
251	static void worker(void* *args) {
252	auto &thread_args
253	= *reinterpret_cast<thread_args_t<F, Types...> *>(args);
254	constexpr size_t n_args
255	= get_number_args<decltype(thread_args.func_args)>();
256	executor_t<n_args>::execute(thread_args);
257
258	pthread_attr_t attr;
259	int res = pthread_getattr_np(pthread_self(), &attr);
260	assert(res == `0`);
261
262	void *stack_base;
263	size_t stack_size;
264	res = pthread_attr_getstack(&attr, &stack_base, &stack_size);
265	assert(res == `0`);
266	MAYBE_UNUSED(res);
267
268	size_t stack_consumption = `0`;
269	size_t start_unprotected_buffer
270	= get_stack_size() - get_page_size() * get_hard_stack_limit();
271	for (size_t i = start_unprotected_buffer; i < stack_size; i++) {
272	if (((const int8_t *)stack_base)[i] != pattern_) {
273	stack_consumption = stack_size - i;
274	break;
275	}
276	}
277	// OS can reserve a space of size up to 4096 (page size) in the
278	// beginning of stack buffer. We shouldn't take the reserved space into
279	// account when calculating stack consumption.
280	if (stack_consumption >= get_page_size())
281	stack_consumption -= get_page_size();
282	return reinterpret_cast<void *>(stack_consumption);
283	}
284
285	static size_t get_stack_size() {
286	static const size_t stack_size
287	= getenv_int_user("SC_STACK_SIZE", `1024` * `1024` * `8`);
288	if (stack_size % get_page_size() != `0`) {
289	printf("Stack checker: DNNL_SC_STACK_SIZE is expected to be "
290	"multiple of page size, which is %lu\n",
291	get_page_size());
292	fflush(stdout);
293	std::terminate();
294	}
295	return stack_size;
296	}
297
298	static size_t get_hard_stack_limit() {
299	static const size_t hard_stack_limit = getenv_int_user(
300	"SC_HARD_STACK_LIMIT", get_stack_size() / get_page_size());
301	return hard_stack_limit;
302	}
303
304	static size_t get_soft_stack_limit() {
305	// Set up the default limit of 5 pages (20480 bytes).
306	static const size_t soft_stack_limit
307	= getenv_int_user("SC_SOFT_STACK_LIMIT", `5`);
308	return soft_stack_limit;
309	}
310
311	static bool is_trace_enabled() {
312	static const bool is_trace_enabled = getenv_int_user("SC_TRACE", `1`);
313	return is_trace_enabled;
314	}
315
316	static size_t get_page_size() {
317	static const size_t page_size = ::getpagesize();
318	return page_size;
319	}
320	};
321
322	} // namespace stack_checker
323	} // namespace impl
324	} // namespace dnnl
325
326	#endif
327	#endif
328

Browse the source code of oneDNN/src/common/stack_checker.hpp