1/*******************************************************************************
2* Copyright 2019-2021 Intel Corporation
3* Copyright 2021 FUJITSU LIMITED
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*******************************************************************************/
17
18// A quick-and-dirty implementation of
19// ----------------------------------
20// tools/perf/Documentation/jitdump-specification.txt
21// tools/perf/Documentation/jit-interface.txt
22
23// WARNING: this implementation is inherently non-thread-safe. Any calls to
24// linux_perf_record_code_load() MUST be protected by a mutex.
25
26#ifdef __linux__
27
28#include <sys/mman.h>
29#include <sys/stat.h>
30#include <sys/types.h>
31
32#include <elf.h>
33#include <errno.h>
34#include <fcntl.h>
35#include <syscall.h>
36#include <unistd.h>
37
38#include <climits>
39#include <cstdint>
40#include <cstdio>
41#include <cstring>
42#include <ctime>
43
44#include <string>
45
46#include "common/utils.hpp"
47#include "common/verbose.hpp"
48
49#include "cpu/platform.hpp"
50
51#include "cpu/jit_utils/linux_perf/linux_perf.hpp"
52
53namespace dnnl {
54namespace impl {
55namespace cpu {
56namespace jit_utils {
57
58class linux_perf_jitdump_t {
59public:
60 linux_perf_jitdump_t()
61 : marker_addr_ {nullptr}
62 , marker_size_ {0}
63 , fd_ {-1}
64 , failed_ {false}
65 , use_tsc_ {false} {
66 // The initialization is lazy and nothing happens if no JIT-ed code
67 // need to be recorded.
68 }
69
70 ~linux_perf_jitdump_t() {
71 write_code_close();
72 finalize();
73 }
74
75 void record_code_load(
76 const void *code, size_t code_size, const char *code_name) {
77 if (is_active()) write_code_load(code, code_size, code_name);
78 }
79
80private:
81 bool is_active() {
82 if (fd_ >= 0) return true;
83 if (failed_) return false;
84 return initialize();
85 }
86
87 bool initialize() {
88 if (!open_file()) return fail();
89 if (!create_marker()) return fail();
90 if (!write_header()) return fail();
91 return true;
92 }
93
94 void finalize() {
95 close_file();
96 delete_marker();
97 }
98
99 bool fail() {
100 finalize();
101 failed_ = true;
102 return false;
103 }
104
105 bool open_file() {
106 auto path_len_ok = [&](const std::string &path) {
107 if (path.length() >= PATH_MAX) {
108 if (get_verbose())
109 printf("onednn_verbose,jit_perf,error,"
110 "dump directory path '%s' is too long\n",
111 path.c_str());
112 return false;
113 }
114 return true;
115 };
116
117 auto complain = [](const std::string &path) {
118 if (get_verbose())
119 printf("onednn_verbose,jit_perf,error,"
120 "cannot create dump directory '%s' (%m)\n",
121 path.c_str());
122 return false;
123 };
124
125 auto make_dir = [&](const std::string &path) {
126 if (!path_len_ok(path)) return false;
127 if (mkdir(path.c_str(), 0755) == -1 && errno != EEXIST)
128 return complain(path);
129 return true;
130 };
131
132 auto make_temp_dir = [&](std::string &path) {
133 if (!path_len_ok(path)) return false;
134 if (mkdtemp(&path[0]) == nullptr) return complain(path);
135 return true;
136 };
137
138 std::string path(get_jit_profiling_jitdumpdir());
139 path.reserve(PATH_MAX);
140
141 if (!make_dir(path)) return false;
142
143 path += "/.debug";
144 if (!make_dir(path)) return false;
145
146 path += "/jit";
147 if (!make_dir(path)) return false;
148
149 path += "/dnnl.XXXXXX";
150 if (!make_temp_dir(path)) return false;
151
152 path += "/jit-" + std::to_string(getpid()) + ".dump";
153 if (!path_len_ok(path)) return false;
154
155 fd_ = open(path.c_str(), O_CREAT | O_TRUNC | O_RDWR, 0666);
156 if (fd_ == -1) {
157 if (get_verbose())
158 printf("onednn_verbose,jit_perf,error,"
159 "cannot open jitdump file '%s' (%m)\n",
160 path.c_str());
161 return false;
162 }
163
164 return true;
165 }
166
167 void close_file() {
168 if (fd_ == -1) return;
169 close(fd_);
170 fd_ = -1;
171 }
172
173 bool create_marker() {
174 // Perf will record an mmap() call and then will find the file we
175 // write the JIT-ed code to. PROT_EXEC ensures that the record is not
176 // ignored.
177 long page_size = sysconf(_SC_PAGESIZE);
178 if (page_size == -1) return false;
179 marker_size_ = (size_t)page_size;
180 marker_addr_ = mmap(nullptr, marker_size_, PROT_READ | PROT_EXEC,
181 MAP_PRIVATE, fd_, 0);
182 return marker_addr_ != MAP_FAILED;
183 }
184
185 void delete_marker() {
186 if (marker_addr_) munmap(marker_addr_, marker_size_);
187 }
188
189 static uint64_t get_timestamp(bool use_tsc) {
190#if DNNL_X64
191 if (use_tsc) {
192 uint32_t hi, lo;
193 asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
194 return (((uint64_t)hi) << 32) | lo;
195 }
196#else
197 if (use_tsc) {
198 fprintf(stderr,
199 "TSC timestamps is not supported. clock_gettime() is used "
200 "instead.\n");
201 }
202#endif
203
204 struct timespec ts;
205 int rc = clock_gettime(CLOCK_MONOTONIC, &ts);
206 if (rc) return 0;
207 return (ts.tv_sec * 1000000000UL) + ts.tv_nsec;
208 }
209
210 static pid_t gettid() {
211 // https://sourceware.org/bugzilla/show_bug.cgi?id=6399
212 return (pid_t)syscall(__NR_gettid);
213 }
214
215 bool write_or_fail(const void *buf, size_t size) {
216 // Write data to the output file or do nothing if the object is in the
217 // failed state. Enter failed state on errors.
218 if (failed_) return false;
219 ssize_t ret = write(fd_, buf, size);
220 if (ret == -1) return fail();
221 return true;
222 }
223
224 bool write_header() {
225 struct {
226 uint32_t magic;
227 uint32_t version;
228 uint32_t total_size;
229 uint32_t elf_mach;
230 uint32_t pad1;
231 uint32_t pid;
232 uint64_t timestamp;
233 uint64_t flags;
234 } h;
235 h.magic = 0x4A695444; // JITHEADER_MAGIC ('DTiJ')
236 h.version = 1;
237 h.total_size = sizeof(h);
238 h.elf_mach = EM_X86_64;
239 h.pad1 = 0;
240 h.pid = getpid();
241
242 use_tsc_ = get_jit_profiling_flags()
243 & DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC;
244 h.timestamp = get_timestamp(use_tsc_);
245 h.flags = use_tsc_ ? 1 : 0;
246
247 return write_or_fail(&h, sizeof(h));
248 }
249
250 bool write_code_close() {
251 struct {
252 uint32_t id;
253 uint32_t total_size;
254 uint64_t timestamp;
255 } c;
256 c.id = 3; // JIT_CODE_CLOSE
257 c.total_size = sizeof(c);
258 c.timestamp = get_timestamp(use_tsc_);
259 return write_or_fail(&c, sizeof(c));
260 }
261
262 bool write_code_load(
263 const void *code, size_t code_size, const char *code_name) {
264 // XXX (rsdubtso): There is no limit on code_size or code_name. This
265 // may lead to huge output files. Do we care?
266 static uint64_t code_index = 0;
267 struct {
268 uint32_t id;
269 uint32_t total_size;
270 uint64_t timestamp;
271 uint32_t pid;
272 uint32_t tid;
273 uint64_t vma;
274 uint64_t code_addr;
275 uint64_t code_size;
276 uint64_t code_index;
277 } c;
278 c.id = 0; // JIT_CODE_LOAD
279 c.total_size = sizeof(c) + strlen(code_name) + 1 + code_size;
280 c.timestamp = get_timestamp(use_tsc_);
281 c.pid = getpid();
282 c.tid = gettid();
283 c.vma = c.code_addr = (uint64_t)code;
284 c.code_size = code_size;
285 c.code_index = code_index++;
286 write_or_fail(&c, sizeof(c));
287 write_or_fail(code_name, strlen(code_name) + 1);
288 return write_or_fail(code, code_size);
289 }
290
291 void *marker_addr_;
292 size_t marker_size_;
293 int fd_;
294 bool failed_;
295 bool use_tsc_;
296};
297
298void linux_perf_jitdump_record_code_load(
299 const void *code, size_t code_size, const char *code_name) {
300 static linux_perf_jitdump_t jitdump;
301 jitdump.record_code_load(code, code_size, code_name);
302}
303
304class linux_perf_jitmap_t {
305public:
306 linux_perf_jitmap_t() : fp_ {nullptr}, failed_ {false} {}
307 ~linux_perf_jitmap_t() = default;
308 void record_symbol(
309 const void *code, size_t code_size, const char *code_name) {
310 if (is_initialized()) write_symbol_info(code, code_size, code_name);
311 }
312
313private:
314 bool is_initialized() {
315 if (fp_) return true;
316 if (failed_) return false;
317 return initialize();
318 }
319
320 bool open_map_file() {
321 char fname[PATH_MAX];
322 int ret = snprintf(fname, PATH_MAX, "/tmp/perf-%d.map", getpid());
323 if (ret >= PATH_MAX) return fail();
324
325 fp_ = fopen(fname, "w+");
326 if (!fp_) return fail();
327 setvbuf(fp_, nullptr, _IOLBF, 0); // disable line buffering
328
329 return true;
330 }
331
332 void close_map_file() {
333 if (fp_) fclose(fp_);
334 }
335
336 bool initialize() { return open_map_file(); }
337
338 bool fail() {
339 close_map_file();
340 failed_ = true;
341 return false;
342 }
343
344 void write_symbol_info(
345 const void *code, size_t code_size, const char *code_name) {
346 if (failed_) return;
347
348 int ret = fprintf(fp_, "%llx %llx %s\n", (unsigned long long)code,
349 (unsigned long long)code_size, code_name);
350
351 if (ret == EOF || ret < 0) fail();
352 }
353
354 FILE *fp_;
355 bool failed_;
356};
357
358void linux_perf_perfmap_record_code_load(
359 const void *code, size_t code_size, const char *code_name) {
360 static linux_perf_jitmap_t jitmap;
361 jitmap.record_symbol(code, code_size, code_name);
362}
363
364} // namespace jit_utils
365} // namespace cpu
366} // namespace impl
367} // namespace dnnl
368
369#endif
370