1 | /******************************************************************************* |
2 | * Copyright 2019-2021 Intel Corporation |
3 | * Copyright 2021 FUJITSU LIMITED |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | *******************************************************************************/ |
17 | |
18 | // A quick-and-dirty implementation of |
19 | // ---------------------------------- |
20 | // tools/perf/Documentation/jitdump-specification.txt |
21 | // tools/perf/Documentation/jit-interface.txt |
22 | |
23 | // WARNING: this implementation is inherently non-thread-safe. Any calls to |
24 | // linux_perf_record_code_load() MUST be protected by a mutex. |
25 | |
26 | #ifdef __linux__ |
27 | |
28 | #include <sys/mman.h> |
29 | #include <sys/stat.h> |
30 | #include <sys/types.h> |
31 | |
32 | #include <elf.h> |
33 | #include <errno.h> |
34 | #include <fcntl.h> |
35 | #include <syscall.h> |
36 | #include <unistd.h> |
37 | |
38 | #include <climits> |
39 | #include <cstdint> |
40 | #include <cstdio> |
41 | #include <cstring> |
42 | #include <ctime> |
43 | |
44 | #include <string> |
45 | |
46 | #include "common/utils.hpp" |
47 | #include "common/verbose.hpp" |
48 | |
49 | #include "cpu/platform.hpp" |
50 | |
51 | #include "cpu/jit_utils/linux_perf/linux_perf.hpp" |
52 | |
53 | namespace dnnl { |
54 | namespace impl { |
55 | namespace cpu { |
56 | namespace jit_utils { |
57 | |
58 | class linux_perf_jitdump_t { |
59 | public: |
60 | linux_perf_jitdump_t() |
61 | : marker_addr_ {nullptr} |
62 | , marker_size_ {0} |
63 | , fd_ {-1} |
64 | , failed_ {false} |
65 | , use_tsc_ {false} { |
66 | // The initialization is lazy and nothing happens if no JIT-ed code |
67 | // need to be recorded. |
68 | } |
69 | |
70 | ~linux_perf_jitdump_t() { |
71 | write_code_close(); |
72 | finalize(); |
73 | } |
74 | |
75 | void record_code_load( |
76 | const void *code, size_t code_size, const char *code_name) { |
77 | if (is_active()) write_code_load(code, code_size, code_name); |
78 | } |
79 | |
80 | private: |
81 | bool is_active() { |
82 | if (fd_ >= 0) return true; |
83 | if (failed_) return false; |
84 | return initialize(); |
85 | } |
86 | |
87 | bool initialize() { |
88 | if (!open_file()) return fail(); |
89 | if (!create_marker()) return fail(); |
90 | if (!write_header()) return fail(); |
91 | return true; |
92 | } |
93 | |
94 | void finalize() { |
95 | close_file(); |
96 | delete_marker(); |
97 | } |
98 | |
99 | bool fail() { |
100 | finalize(); |
101 | failed_ = true; |
102 | return false; |
103 | } |
104 | |
105 | bool open_file() { |
106 | auto path_len_ok = [&](const std::string &path) { |
107 | if (path.length() >= PATH_MAX) { |
108 | if (get_verbose()) |
109 | printf("onednn_verbose,jit_perf,error," |
110 | "dump directory path '%s' is too long\n" , |
111 | path.c_str()); |
112 | return false; |
113 | } |
114 | return true; |
115 | }; |
116 | |
117 | auto complain = [](const std::string &path) { |
118 | if (get_verbose()) |
119 | printf("onednn_verbose,jit_perf,error," |
120 | "cannot create dump directory '%s' (%m)\n" , |
121 | path.c_str()); |
122 | return false; |
123 | }; |
124 | |
125 | auto make_dir = [&](const std::string &path) { |
126 | if (!path_len_ok(path)) return false; |
127 | if (mkdir(path.c_str(), 0755) == -1 && errno != EEXIST) |
128 | return complain(path); |
129 | return true; |
130 | }; |
131 | |
132 | auto make_temp_dir = [&](std::string &path) { |
133 | if (!path_len_ok(path)) return false; |
134 | if (mkdtemp(&path[0]) == nullptr) return complain(path); |
135 | return true; |
136 | }; |
137 | |
138 | std::string path(get_jit_profiling_jitdumpdir()); |
139 | path.reserve(PATH_MAX); |
140 | |
141 | if (!make_dir(path)) return false; |
142 | |
143 | path += "/.debug" ; |
144 | if (!make_dir(path)) return false; |
145 | |
146 | path += "/jit" ; |
147 | if (!make_dir(path)) return false; |
148 | |
149 | path += "/dnnl.XXXXXX" ; |
150 | if (!make_temp_dir(path)) return false; |
151 | |
152 | path += "/jit-" + std::to_string(getpid()) + ".dump" ; |
153 | if (!path_len_ok(path)) return false; |
154 | |
155 | fd_ = open(path.c_str(), O_CREAT | O_TRUNC | O_RDWR, 0666); |
156 | if (fd_ == -1) { |
157 | if (get_verbose()) |
158 | printf("onednn_verbose,jit_perf,error," |
159 | "cannot open jitdump file '%s' (%m)\n" , |
160 | path.c_str()); |
161 | return false; |
162 | } |
163 | |
164 | return true; |
165 | } |
166 | |
167 | void close_file() { |
168 | if (fd_ == -1) return; |
169 | close(fd_); |
170 | fd_ = -1; |
171 | } |
172 | |
173 | bool create_marker() { |
174 | // Perf will record an mmap() call and then will find the file we |
175 | // write the JIT-ed code to. PROT_EXEC ensures that the record is not |
176 | // ignored. |
177 | long page_size = sysconf(_SC_PAGESIZE); |
178 | if (page_size == -1) return false; |
179 | marker_size_ = (size_t)page_size; |
180 | marker_addr_ = mmap(nullptr, marker_size_, PROT_READ | PROT_EXEC, |
181 | MAP_PRIVATE, fd_, 0); |
182 | return marker_addr_ != MAP_FAILED; |
183 | } |
184 | |
185 | void delete_marker() { |
186 | if (marker_addr_) munmap(marker_addr_, marker_size_); |
187 | } |
188 | |
189 | static uint64_t get_timestamp(bool use_tsc) { |
190 | #if DNNL_X64 |
191 | if (use_tsc) { |
192 | uint32_t hi, lo; |
193 | asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); |
194 | return (((uint64_t)hi) << 32) | lo; |
195 | } |
196 | #else |
197 | if (use_tsc) { |
198 | fprintf(stderr, |
199 | "TSC timestamps is not supported. clock_gettime() is used " |
200 | "instead.\n" ); |
201 | } |
202 | #endif |
203 | |
204 | struct timespec ts; |
205 | int rc = clock_gettime(CLOCK_MONOTONIC, &ts); |
206 | if (rc) return 0; |
207 | return (ts.tv_sec * 1000000000UL) + ts.tv_nsec; |
208 | } |
209 | |
210 | static pid_t gettid() { |
211 | // https://sourceware.org/bugzilla/show_bug.cgi?id=6399 |
212 | return (pid_t)syscall(__NR_gettid); |
213 | } |
214 | |
215 | bool write_or_fail(const void *buf, size_t size) { |
216 | // Write data to the output file or do nothing if the object is in the |
217 | // failed state. Enter failed state on errors. |
218 | if (failed_) return false; |
219 | ssize_t ret = write(fd_, buf, size); |
220 | if (ret == -1) return fail(); |
221 | return true; |
222 | } |
223 | |
224 | bool () { |
225 | struct { |
226 | uint32_t magic; |
227 | uint32_t version; |
228 | uint32_t total_size; |
229 | uint32_t elf_mach; |
230 | uint32_t pad1; |
231 | uint32_t pid; |
232 | uint64_t timestamp; |
233 | uint64_t flags; |
234 | } h; |
235 | h.magic = 0x4A695444; // JITHEADER_MAGIC ('DTiJ') |
236 | h.version = 1; |
237 | h.total_size = sizeof(h); |
238 | h.elf_mach = EM_X86_64; |
239 | h.pad1 = 0; |
240 | h.pid = getpid(); |
241 | |
242 | use_tsc_ = get_jit_profiling_flags() |
243 | & DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC; |
244 | h.timestamp = get_timestamp(use_tsc_); |
245 | h.flags = use_tsc_ ? 1 : 0; |
246 | |
247 | return write_or_fail(&h, sizeof(h)); |
248 | } |
249 | |
250 | bool write_code_close() { |
251 | struct { |
252 | uint32_t id; |
253 | uint32_t total_size; |
254 | uint64_t timestamp; |
255 | } c; |
256 | c.id = 3; // JIT_CODE_CLOSE |
257 | c.total_size = sizeof(c); |
258 | c.timestamp = get_timestamp(use_tsc_); |
259 | return write_or_fail(&c, sizeof(c)); |
260 | } |
261 | |
262 | bool write_code_load( |
263 | const void *code, size_t code_size, const char *code_name) { |
264 | // XXX (rsdubtso): There is no limit on code_size or code_name. This |
265 | // may lead to huge output files. Do we care? |
266 | static uint64_t code_index = 0; |
267 | struct { |
268 | uint32_t id; |
269 | uint32_t total_size; |
270 | uint64_t timestamp; |
271 | uint32_t pid; |
272 | uint32_t tid; |
273 | uint64_t vma; |
274 | uint64_t code_addr; |
275 | uint64_t code_size; |
276 | uint64_t code_index; |
277 | } c; |
278 | c.id = 0; // JIT_CODE_LOAD |
279 | c.total_size = sizeof(c) + strlen(code_name) + 1 + code_size; |
280 | c.timestamp = get_timestamp(use_tsc_); |
281 | c.pid = getpid(); |
282 | c.tid = gettid(); |
283 | c.vma = c.code_addr = (uint64_t)code; |
284 | c.code_size = code_size; |
285 | c.code_index = code_index++; |
286 | write_or_fail(&c, sizeof(c)); |
287 | write_or_fail(code_name, strlen(code_name) + 1); |
288 | return write_or_fail(code, code_size); |
289 | } |
290 | |
291 | void *marker_addr_; |
292 | size_t marker_size_; |
293 | int fd_; |
294 | bool failed_; |
295 | bool use_tsc_; |
296 | }; |
297 | |
298 | void linux_perf_jitdump_record_code_load( |
299 | const void *code, size_t code_size, const char *code_name) { |
300 | static linux_perf_jitdump_t jitdump; |
301 | jitdump.record_code_load(code, code_size, code_name); |
302 | } |
303 | |
304 | class linux_perf_jitmap_t { |
305 | public: |
306 | linux_perf_jitmap_t() : fp_ {nullptr}, failed_ {false} {} |
307 | ~linux_perf_jitmap_t() = default; |
308 | void record_symbol( |
309 | const void *code, size_t code_size, const char *code_name) { |
310 | if (is_initialized()) write_symbol_info(code, code_size, code_name); |
311 | } |
312 | |
313 | private: |
314 | bool is_initialized() { |
315 | if (fp_) return true; |
316 | if (failed_) return false; |
317 | return initialize(); |
318 | } |
319 | |
320 | bool open_map_file() { |
321 | char fname[PATH_MAX]; |
322 | int ret = snprintf(fname, PATH_MAX, "/tmp/perf-%d.map" , getpid()); |
323 | if (ret >= PATH_MAX) return fail(); |
324 | |
325 | fp_ = fopen(fname, "w+" ); |
326 | if (!fp_) return fail(); |
327 | setvbuf(fp_, nullptr, _IOLBF, 0); // disable line buffering |
328 | |
329 | return true; |
330 | } |
331 | |
332 | void close_map_file() { |
333 | if (fp_) fclose(fp_); |
334 | } |
335 | |
336 | bool initialize() { return open_map_file(); } |
337 | |
338 | bool fail() { |
339 | close_map_file(); |
340 | failed_ = true; |
341 | return false; |
342 | } |
343 | |
344 | void write_symbol_info( |
345 | const void *code, size_t code_size, const char *code_name) { |
346 | if (failed_) return; |
347 | |
348 | int ret = fprintf(fp_, "%llx %llx %s\n" , (unsigned long long)code, |
349 | (unsigned long long)code_size, code_name); |
350 | |
351 | if (ret == EOF || ret < 0) fail(); |
352 | } |
353 | |
354 | FILE *fp_; |
355 | bool failed_; |
356 | }; |
357 | |
358 | void linux_perf_perfmap_record_code_load( |
359 | const void *code, size_t code_size, const char *code_name) { |
360 | static linux_perf_jitmap_t jitmap; |
361 | jitmap.record_symbol(code, code_size, code_name); |
362 | } |
363 | |
364 | } // namespace jit_utils |
365 | } // namespace cpu |
366 | } // namespace impl |
367 | } // namespace dnnl |
368 | |
369 | #endif |
370 | |