1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18
19#include <stdio.h>
20#include <thread>
21#include <gflags/gflags.h>
22#include "butil/files/file_enumerator.h"
23#include "butil/file_util.h" // butil::FilePath
24#include "butil/popen.h" // butil::read_command_output
25#include "butil/fd_guard.h" // butil::fd_guard
26#include "brpc/log.h"
27#include "brpc/controller.h"
28#include "brpc/server.h"
29#include "brpc/reloadable_flags.h"
30#include "brpc/builtin/pprof_perl.h"
31#include "brpc/builtin/flamegraph_perl.h"
32#include "brpc/builtin/hotspots_service.h"
33#include "brpc/details/tcmalloc_extension.h"
34
35extern "C" {
36int __attribute__((weak)) ProfilerStart(const char* fname);
37void __attribute__((weak)) ProfilerStop();
38}
39
40namespace bthread {
41bool ContentionProfilerStart(const char* filename);
42void ContentionProfilerStop();
43}
44
45
46namespace brpc {
47enum class DisplayType{
48 kUnknown,
49 kDot,
50#if defined(OS_LINUX)
51 kFlameGraph,
52#endif
53 kText
54};
55
56static const char* DisplayTypeToString(DisplayType type) {
57 switch (type) {
58 case DisplayType::kDot: return "dot";
59#if defined(OS_LINUX)
60 case DisplayType::kFlameGraph: return "flame";
61#endif
62 case DisplayType::kText: return "text";
63 default: return "unknown";
64 }
65}
66
67static DisplayType StringToDisplayType(const std::string& val) {
68 static butil::CaseIgnoredFlatMap<DisplayType>* display_type_map;
69 static std::once_flag flag;
70 std::call_once(flag, []() {
71 display_type_map = new butil::CaseIgnoredFlatMap<DisplayType>;
72 display_type_map->init(10);
73 (*display_type_map)["dot"] = DisplayType::kDot;
74#if defined(OS_LINUX)
75 (*display_type_map)["flame"] = DisplayType::kFlameGraph;
76#endif
77 (*display_type_map)["text"] = DisplayType::kText;
78 });
79 auto type = display_type_map->seek(val);
80 if (type == nullptr) {
81 return DisplayType::kUnknown;
82 }
83 return *type;
84}
85
86static std::string DisplayTypeToPProfArgument(DisplayType type) {
87 switch (type) {
88#if defined(OS_LINUX)
89 case DisplayType::kDot: return " --dot ";
90 case DisplayType::kFlameGraph: return " --collapsed ";
91 case DisplayType::kText: return " --text ";
92#elif defined(OS_MACOSX)
93 case DisplayType::kDot: return " -dot ";
94 case DisplayType::kText: return " -text ";
95#endif
96 default: return " unknown type ";
97 }
98}
99
100static std::string GeneratePerlScriptPath(const std::string& filename) {
101 std::string path;
102 path.reserve(FLAGS_rpc_profiling_dir.size() + 1 + filename.size());
103 path += FLAGS_rpc_profiling_dir;
104 path.push_back('/');
105 path += filename;
106 return std::move(path);
107}
108
109extern bool cpu_profiler_enabled;
110
111DEFINE_int32(max_profiling_seconds, 300, "upper limit of running time of profilers");
112BRPC_VALIDATE_GFLAG(max_profiling_seconds, NonNegativeInteger);
113
114DEFINE_int32(max_profiles_kept, 32,
115 "max profiles kept for cpu/heap/growth/contention respectively");
116BRPC_VALIDATE_GFLAG(max_profiles_kept, PassValidate);
117
118static const char* const PPROF_FILENAME = "pprof.pl";
119static const char* const FLAMEGRAPH_FILENAME = "flamegraph.pl";
120static int DEFAULT_PROFILING_SECONDS = 10;
121static size_t CONCURRENT_PROFILING_LIMIT = 256;
122
123struct ProfilingWaiter {
124 Controller* cntl;
125 ::google::protobuf::Closure* done;
126};
127
128// Information of the client doing profiling.
129struct ProfilingClient {
130 ProfilingClient() : end_us(0), seconds(0), id(0) {}
131
132 int64_t end_us;
133 int seconds;
134 int64_t id;
135 butil::EndPoint point;
136};
137
138struct ProfilingResult {
139 ProfilingResult() : id(0), status_code(HTTP_STATUS_OK) {}
140
141 int64_t id;
142 int status_code;
143 butil::IOBuf result;
144};
145
146static bool g_written_pprof_perl = false;
147
148struct ProfilingEnvironment {
149 pthread_mutex_t mutex;
150 int64_t cur_id;
151 ProfilingClient* client;
152 std::vector<ProfilingWaiter>* waiters;
153 ProfilingResult* cached_result;
154};
155
156// Different ProfilingType have different env.
157static ProfilingEnvironment g_env[4] = {
158 { PTHREAD_MUTEX_INITIALIZER, 0, NULL, NULL, NULL },
159 { PTHREAD_MUTEX_INITIALIZER, 0, NULL, NULL, NULL },
160 { PTHREAD_MUTEX_INITIALIZER, 0, NULL, NULL, NULL },
161 { PTHREAD_MUTEX_INITIALIZER, 0, NULL, NULL, NULL }
162};
163
164// The `content' should be small so that it can be written into file in one
165// fwrite (at most time).
166static bool WriteSmallFile(const char* filepath_in,
167 const butil::StringPiece& content) {
168 butil::File::Error error;
169 butil::FilePath path(filepath_in);
170 butil::FilePath dir = path.DirName();
171 if (!butil::CreateDirectoryAndGetError(dir, &error)) {
172 LOG(ERROR) << "Fail to create directory=`" << dir.value()
173 << "', " << error;
174 return false;
175 }
176 FILE* fp = fopen(path.value().c_str(), "w");
177 if (NULL == fp) {
178 LOG(ERROR) << "Fail to open `" << path.value() << '\'';
179 return false;
180 }
181 bool ret = true;
182 if (fwrite(content.data(), content.size(), 1UL, fp) != 1UL) {
183 LOG(ERROR) << "Fail to write into " << path.value();
184 ret = false;
185 }
186 CHECK_EQ(0, fclose(fp));
187 return ret;
188}
189
190static bool WriteSmallFile(const char* filepath_in,
191 const butil::IOBuf& content) {
192 butil::File::Error error;
193 butil::FilePath path(filepath_in);
194 butil::FilePath dir = path.DirName();
195 if (!butil::CreateDirectoryAndGetError(dir, &error)) {
196 LOG(ERROR) << "Fail to create directory=`" << dir.value()
197 << "', " << error;
198 return false;
199 }
200 FILE* fp = fopen(path.value().c_str(), "w");
201 if (NULL == fp) {
202 LOG(ERROR) << "Fail to open `" << path.value() << '\'';
203 return false;
204 }
205 butil::IOBufAsZeroCopyInputStream iter(content);
206 const void* data = NULL;
207 int size = 0;
208 while (iter.Next(&data, &size)) {
209 if (fwrite(data, size, 1UL, fp) != 1UL) {
210 LOG(ERROR) << "Fail to write into " << path.value();
211 fclose(fp);
212 return false;
213 }
214 }
215 fclose(fp);
216 return true;
217}
218
219static int ReadSeconds(const Controller* cntl) {
220 int seconds = DEFAULT_PROFILING_SECONDS;
221 const std::string* param =
222 cntl->http_request().uri().GetQuery("seconds");
223 if (param != NULL) {
224 char* endptr = NULL;
225 const long sec = strtol(param->c_str(), &endptr, 10);
226 if (endptr == param->c_str() + param->length()) {
227 seconds = sec;
228 } else {
229 return -1;
230 }
231 }
232 seconds = std::min(seconds, FLAGS_max_profiling_seconds);
233 return seconds;
234}
235
236static const char* GetBaseName(const std::string* full_base_name) {
237 if (full_base_name == NULL) {
238 return NULL;
239 }
240 size_t offset = full_base_name->find_last_of('/');
241 if (offset == std::string::npos) {
242 offset = 0;
243 } else {
244 ++offset;
245 }
246 return full_base_name->c_str() + offset;
247}
248
249static const char* GetBaseName(const char* full_base_name) {
250 butil::StringPiece s(full_base_name);
251 size_t offset = s.find_last_of('/');
252 if (offset == butil::StringPiece::npos) {
253 offset = 0;
254 } else {
255 ++offset;
256 }
257 return s.data() + offset;
258}
259
260// Test if path of the profile is valid.
261// NOTE: this function MUST be applied to all parameters finally passed to
262// system related functions (popen/system/exec ...) to avoid potential
263// injections from URL and other user inputs.
264static bool ValidProfilePath(const butil::StringPiece& path) {
265 if (!path.starts_with(FLAGS_rpc_profiling_dir)) {
266 // Must be under the directory.
267 return false;
268 }
269 int consecutive_dot_count = 0;
270 for (size_t i = 0; i < path.size(); ++i) {
271 const char c = path[i];
272 if (c == '.') {
273 ++consecutive_dot_count;
274 if (consecutive_dot_count >= 2) {
275 // Disallow consecutive dots to go to upper level directories.
276 return false;
277 } else {
278 continue;
279 }
280 } else {
281 consecutive_dot_count = 0;
282 }
283 if (!isalpha(c) && !isdigit(c) &&
284 c != '_' && c != '-' && c != '/') {
285 return false;
286 }
287 }
288 return true;
289}
290
291static int MakeCacheName(char* cache_name, size_t len,
292 const char* prof_name,
293 const char* base_name,
294 DisplayType display_type,
295 bool show_ccount) {
296 if (base_name) {
297 return snprintf(cache_name, len, "%s.cache/base_%s.%s%s", prof_name,
298 base_name,
299 DisplayTypeToString(display_type),
300 (show_ccount ? ".ccount" : ""));
301 } else {
302 return snprintf(cache_name, len, "%s.cache/%s%s", prof_name,
303 DisplayTypeToString(display_type),
304 (show_ccount ? ".ccount" : ""));
305
306 }
307}
308
309static int MakeProfName(ProfilingType type, char* buf, size_t buf_len) {
310 int nr = snprintf(buf, buf_len, "%s/%s/", FLAGS_rpc_profiling_dir.c_str(),
311 GetProgramChecksum());
312 if (nr < 0) {
313 return -1;
314 }
315 buf += nr;
316 buf_len -= nr;
317
318 time_t rawtime;
319 time(&rawtime);
320 struct tm* timeinfo = localtime(&rawtime);
321 const size_t nw = strftime(buf, buf_len, "%Y%m%d.%H%M%S", timeinfo);
322 buf += nw;
323 buf_len -= nw;
324
325 // We have checksum in the path, getpid() is not necessary now.
326 snprintf(buf, buf_len, ".%s", ProfilingType2String(type));
327 return 0;
328}
329
330static void ConsumeWaiters(ProfilingType type, const Controller* cur_cntl,
331 std::vector<ProfilingWaiter>* waiters) {
332 waiters->clear();
333 if ((int)type >= (int)arraysize(g_env)) {
334 LOG(ERROR) << "Invalid type=" << type;
335 return;
336 }
337 ProfilingEnvironment& env = g_env[type];
338 if (env.client) {
339 BAIDU_SCOPED_LOCK(env.mutex);
340 if (env.client == NULL) {
341 return;
342 }
343 if (env.cached_result == NULL) {
344 env.cached_result = new ProfilingResult;
345 }
346 env.cached_result->id = env.client->id;
347 env.cached_result->status_code =
348 cur_cntl->http_response().status_code();
349 env.cached_result->result = cur_cntl->response_attachment();
350
351 delete env.client;
352 env.client = NULL;
353 if (env.waiters) {
354 env.waiters->swap(*waiters);
355 }
356 }
357}
358
359// This function is always called with g_env[type].mutex UNLOCKED.
360static void NotifyWaiters(ProfilingType type, const Controller* cur_cntl,
361 const std::string* view) {
362 if (view != NULL) {
363 return;
364 }
365 std::vector<ProfilingWaiter> saved_waiters;
366 CHECK(g_env[type].client);
367 ConsumeWaiters(type, cur_cntl, &saved_waiters);
368 for (size_t i = 0; i < saved_waiters.size(); ++i) {
369 Controller* cntl = saved_waiters[i].cntl;
370 ::google::protobuf::Closure* done = saved_waiters[i].done;
371 cntl->http_response().set_status_code(
372 cur_cntl->http_response().status_code());
373 cntl->response_attachment().append(cur_cntl->response_attachment());
374 done->Run();
375 }
376}
377
378#if defined(OS_MACOSX)
379static bool check_GOOGLE_PPROF_BINARY_PATH() {
380 char* str = getenv("GOOGLE_PPROF_BINARY_PATH");
381 if (str == NULL) {
382 return false;
383 }
384 butil::fd_guard fd(open(str, O_RDONLY));
385 if (fd < 0) {
386 return false;
387 }
388 return true;
389}
390
391static bool has_GOOGLE_PPROF_BINARY_PATH() {
392 static bool val = check_GOOGLE_PPROF_BINARY_PATH();
393 return val;
394}
395#endif
396
397static void DisplayResult(Controller* cntl,
398 google::protobuf::Closure* done,
399 const char* prof_name,
400 const butil::IOBuf& result_prefix) {
401 ClosureGuard done_guard(done);
402 butil::IOBuf prof_result;
403 if (cntl->IsCanceled()) {
404 // If the page is refreshed, older connections are likely to be
405 // already closed by browser.
406 return;
407 }
408 butil::IOBuf& resp = cntl->response_attachment();
409 const bool use_html = UseHTML(cntl->http_request());
410 const bool show_ccount = cntl->http_request().uri().GetQuery("ccount");
411 const std::string* base_name = cntl->http_request().uri().GetQuery("base");
412 const std::string* display_type_query = cntl->http_request().uri().GetQuery("display_type");
413 DisplayType display_type = DisplayType::kDot;
414 if (display_type_query) {
415 display_type = StringToDisplayType(*display_type_query);
416 if (display_type == DisplayType::kUnknown) {
417 return cntl->SetFailed(EINVAL, "Invalid display_type=%s", display_type_query->c_str());
418 }
419 }
420 if (base_name != NULL) {
421 if (!ValidProfilePath(*base_name)) {
422 return cntl->SetFailed(EINVAL, "Invalid query `base'");
423 }
424 if (!butil::PathExists(butil::FilePath(*base_name))) {
425 return cntl->SetFailed(
426 EINVAL, "The profile denoted by `base' does not exist");
427 }
428 }
429 butil::IOBufBuilder os;
430 os << result_prefix;
431 char expected_result_name[256];
432 MakeCacheName(expected_result_name, sizeof(expected_result_name),
433 prof_name, GetBaseName(base_name),
434 display_type, show_ccount);
435 // Try to read cache first.
436 FILE* fp = fopen(expected_result_name, "r");
437 if (fp != NULL) {
438 bool succ = false;
439 char buffer[1024];
440 while (1) {
441 size_t nr = fread(buffer, 1, sizeof(buffer), fp);
442 if (nr != 0) {
443 prof_result.append(buffer, nr);
444 }
445 if (nr != sizeof(buffer)) {
446 if (feof(fp)) {
447 succ = true;
448 break;
449 } else if (ferror(fp)) {
450 LOG(ERROR) << "Encountered error while reading for "
451 << expected_result_name;
452 break;
453 }
454 // retry;
455 }
456 }
457 PLOG_IF(ERROR, fclose(fp) != 0) << "Fail to close fp";
458 if (succ) {
459 RPC_VLOG << "Hit cache=" << expected_result_name;
460 os.move_to(resp);
461 if (use_html) {
462 resp.append("<pre>");
463 }
464 resp.append(prof_result);
465 if (use_html) {
466 resp.append("</pre></body></html>");
467 }
468 return;
469 }
470 }
471
472 std::ostringstream cmd_builder;
473
474 std::string pprof_tool{GeneratePerlScriptPath(PPROF_FILENAME)};
475 std::string flamegraph_tool{GeneratePerlScriptPath(FLAMEGRAPH_FILENAME)};
476
477#if defined(OS_LINUX)
478 cmd_builder << "perl " << pprof_tool
479 << DisplayTypeToPProfArgument(display_type)
480 << (show_ccount ? " --contention " : "");
481 if (base_name) {
482 cmd_builder << "--base " << *base_name << ' ';
483 }
484
485 cmd_builder << GetProgramName() << " " << prof_name;
486
487 if (display_type == DisplayType::kFlameGraph) {
488 // For flamegraph, we don't care about pprof error msg,
489 // which will cause confusing messages in the final result.
490 cmd_builder << " 2>/dev/null " << " | " << "perl " << flamegraph_tool;
491 }
492 cmd_builder << " 2>&1 ";
493#elif defined(OS_MACOSX)
494 cmd_builder << getenv("GOOGLE_PPROF_BINARY_PATH") << " "
495 << DisplayTypeToPProfArgument(display_type)
496 << (show_ccount ? " -contentions " : "");
497 if (base_name) {
498 cmd_builder << "-base " << *base_name << ' ';
499 }
500 cmd_builder << prof_name << " 2>&1 ";
501#endif
502
503 const std::string cmd = cmd_builder.str();
504 for (int ntry = 0; ntry < 2; ++ntry) {
505 if (!g_written_pprof_perl) {
506 if (!WriteSmallFile(pprof_tool.c_str(), pprof_perl()) ||
507 !WriteSmallFile(flamegraph_tool.c_str(), flamegraph_perl())) {
508 os << "Fail to write " << pprof_tool
509 << (use_html ? "</body></html>" : "\n");
510 os.move_to(resp);
511 cntl->http_response().set_status_code(
512 HTTP_STATUS_INTERNAL_SERVER_ERROR);
513 return;
514 }
515 g_written_pprof_perl = true;
516 }
517 errno = 0; // read_command_output may not set errno, clear it to make sure if
518 // we see non-zero errno, it's real error.
519 butil::IOBufBuilder pprof_output;
520 const int rc = butil::read_command_output(pprof_output, cmd.c_str());
521 if (rc != 0) {
522 butil::FilePath pprof_path(pprof_tool);
523 if (!butil::PathExists(pprof_path)) {
524 // Write the script again.
525 g_written_pprof_perl = false;
526 // tell user.
527 os << pprof_path.value() << " was removed, recreate ...\n\n";
528 continue;
529 }
530 butil::FilePath flamegraph_path(flamegraph_tool);
531 if (!butil::PathExists(flamegraph_path)) {
532 // Write the script again.
533 g_written_pprof_perl = false;
534 // tell user.
535 os << flamegraph_path.value() << " was removed, recreate ...\n\n";
536 continue;
537 }
538 if (rc < 0) {
539 os << "Fail to execute `" << cmd << "', " << berror()
540 << (use_html ? "</body></html>" : "\n");
541 os.move_to(resp);
542 cntl->http_response().set_status_code(
543 HTTP_STATUS_INTERNAL_SERVER_ERROR);
544 return;
545 }
546 // cmd returns non zero, quit normally
547 }
548 pprof_output.move_to(prof_result);
549 // Cache result in file.
550 char result_name[256];
551 MakeCacheName(result_name, sizeof(result_name), prof_name,
552 GetBaseName(base_name), display_type, show_ccount);
553
554 // Append the profile name as the visual reminder for what
555 // current profile is.
556 butil::IOBuf before_label;
557 butil::IOBuf tmp;
558 if (cntl->http_request().uri().GetQuery("view") == NULL) {
559 tmp.append(prof_name);
560 tmp.append("[addToProfEnd]");
561 }
562 if (prof_result.cut_until(&before_label, ",label=\"") == 0) {
563 tmp.append(before_label);
564 tmp.append(",label=\"[");
565 tmp.append(GetBaseName(prof_name));
566 if (base_name) {
567 tmp.append(" - ");
568 tmp.append(GetBaseName(base_name));
569 }
570 tmp.append("]\\l");
571 tmp.append(prof_result);
572 tmp.swap(prof_result);
573 } else {
574 // Assume it's text. append before result directly.
575 tmp.append("[");
576 tmp.append(GetBaseName(prof_name));
577 if (base_name) {
578 tmp.append(" - ");
579 tmp.append(GetBaseName(base_name));
580 }
581 tmp.append("]\n");
582 tmp.append(prof_result);
583 tmp.swap(prof_result);
584 }
585
586 if (!WriteSmallFile(result_name, prof_result)) {
587 LOG(ERROR) << "Fail to write " << result_name;
588 CHECK(butil::DeleteFile(butil::FilePath(result_name), false));
589 }
590 break;
591 }
592 CHECK(!use_html);
593 // NOTE: not send prof_result to os first which does copying.
594 os.move_to(resp);
595 if (use_html) {
596 resp.append("<pre>");
597 }
598 resp.append(prof_result);
599 if (use_html) {
600 resp.append("</pre></body></html>");
601 }
602}
603
604static void DoProfiling(ProfilingType type,
605 ::google::protobuf::RpcController* cntl_base,
606 ::google::protobuf::Closure* done) {
607 ClosureGuard done_guard(done);
608 Controller *cntl = static_cast<Controller*>(cntl_base);
609 butil::IOBuf& resp = cntl->response_attachment();
610 const bool use_html = UseHTML(cntl->http_request());
611 cntl->http_response().set_content_type(use_html ? "text/html" : "text/plain");
612
613 butil::IOBufBuilder os;
614 if (use_html) {
615 os << "<!DOCTYPE html><html><head>\n"
616 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
617 "<script language=\"javascript\" type=\"text/javascript\" src=\"/js/jquery_min\"></script>\n"
618 << TabsHead()
619 << "<style type=\"text/css\">\n"
620 ".logo {position: fixed; bottom: 0px; right: 0px; }\n"
621 ".logo_text {color: #B0B0B0; }\n"
622 "</style>\n"
623 "</head>\n"
624 "<body>\n";
625 cntl->server()->PrintTabsBody(os, ProfilingType2String(type));
626 }
627
628 const std::string* view = cntl->http_request().uri().GetQuery("view");
629 if (view) {
630 if (!ValidProfilePath(*view)) {
631 return cntl->SetFailed(EINVAL, "Invalid query `view'");
632 }
633 if (!butil::PathExists(butil::FilePath(*view))) {
634 return cntl->SetFailed(
635 EINVAL, "The profile denoted by `view' does not exist");
636 }
637 DisplayResult(cntl, done_guard.release(), view->c_str(), os.buf());
638 return;
639 }
640
641 const int seconds = ReadSeconds(cntl);
642 if ((type == PROFILING_CPU || type == PROFILING_CONTENTION)) {
643 if (seconds < 0) {
644 os << "Invalid seconds" << (use_html ? "</body></html>" : "\n");
645 os.move_to(cntl->response_attachment());
646 cntl->http_response().set_status_code(HTTP_STATUS_BAD_REQUEST);
647 return;
648 }
649 }
650
651 // Log requester
652 std::ostringstream client_info;
653 client_info << cntl->remote_side();
654 if (cntl->auth_context()) {
655 client_info << "(auth=" << cntl->auth_context()->user() << ')';
656 } else {
657 client_info << "(no auth)";
658 }
659 client_info << " requests for profiling " << ProfilingType2String(type);
660 if (type == PROFILING_CPU || type == PROFILING_CONTENTION) {
661 LOG(INFO) << client_info.str() << " for " << seconds << " seconds";
662 } else {
663 LOG(INFO) << client_info.str();
664 }
665 int64_t prof_id = 0;
666 const std::string* prof_id_str =
667 cntl->http_request().uri().GetQuery("profiling_id");
668 if (prof_id_str != NULL) {
669 char* endptr = NULL;
670 prof_id = strtoll(prof_id_str->c_str(), &endptr, 10);
671 LOG_IF(ERROR, *endptr != '\0') << "Invalid profiling_id=" << prof_id;
672 }
673
674 {
675 BAIDU_SCOPED_LOCK(g_env[type].mutex);
676 if (g_env[type].client) {
677 if (NULL == g_env[type].waiters) {
678 g_env[type].waiters = new std::vector<ProfilingWaiter>;
679 }
680 ProfilingWaiter waiter = { cntl, done_guard.release() };
681 g_env[type].waiters->push_back(waiter);
682 RPC_VLOG << "Queue request from " << cntl->remote_side();
683 return;
684 }
685 if (g_env[type].cached_result != NULL &&
686 g_env[type].cached_result->id == prof_id) {
687 cntl->http_response().set_status_code(
688 g_env[type].cached_result->status_code);
689 cntl->response_attachment().append(
690 g_env[type].cached_result->result);
691 RPC_VLOG << "Hit cached result, id=" << prof_id;
692 return;
693 }
694 CHECK(NULL == g_env[type].client);
695 g_env[type].client = new ProfilingClient;
696 g_env[type].client->end_us = butil::cpuwide_time_us() + seconds * 1000000L;
697 g_env[type].client->seconds = seconds;
698 // This id work arounds an issue of chrome (or jquery under chrome) that
699 // the ajax call in another tab may be delayed until ajax call in
700 // current tab finishes. We assign a increasing-only id to each
701 // profiling and save last profiling result along with the assigned id.
702 // If the delay happens, the viewr should send the ajax call with an
703 // id matching the id in cached result, then the result will be returned
704 // directly instead of running another profiling which may take long
705 // time.
706 if (0 == ++ g_env[type].cur_id) { // skip 0
707 ++ g_env[type].cur_id;
708 }
709 g_env[type].client->id = g_env[type].cur_id;
710 g_env[type].client->point = cntl->remote_side();
711 }
712
713 RPC_VLOG << "Apply request from " << cntl->remote_side();
714
715 char prof_name[128];
716 if (MakeProfName(type, prof_name, sizeof(prof_name)) != 0) {
717 os << "Fail to create prof name: " << berror()
718 << (use_html ? "</body></html>" : "\n");
719 os.move_to(resp);
720 cntl->http_response().set_status_code(HTTP_STATUS_INTERNAL_SERVER_ERROR);
721 return NotifyWaiters(type, cntl, view);
722 }
723
724#if defined(OS_MACOSX)
725 if (!has_GOOGLE_PPROF_BINARY_PATH()) {
726 os << "no GOOGLE_PPROF_BINARY_PATH in env"
727 << (use_html ? "</body></html>" : "\n");
728 os.move_to(resp);
729 cntl->http_response().set_status_code(HTTP_STATUS_FORBIDDEN);
730 return NotifyWaiters(type, cntl, view);
731 }
732#endif
733 if (type == PROFILING_CPU) {
734 if ((void*)ProfilerStart == NULL || (void*)ProfilerStop == NULL) {
735 os << "CPU profiler is not enabled"
736 << (use_html ? "</body></html>" : "\n");
737 os.move_to(resp);
738 cntl->http_response().set_status_code(HTTP_STATUS_FORBIDDEN);
739 return NotifyWaiters(type, cntl, view);
740 }
741 butil::File::Error error;
742 const butil::FilePath dir = butil::FilePath(prof_name).DirName();
743 if (!butil::CreateDirectoryAndGetError(dir, &error)) {
744 os << "Fail to create directory=`" << dir.value() << ", "
745 << error << (use_html ? "</body></html>" : "\n");
746 os.move_to(resp);
747 cntl->http_response().set_status_code(
748 HTTP_STATUS_INTERNAL_SERVER_ERROR);
749 return NotifyWaiters(type, cntl, view);
750 }
751 if (!ProfilerStart(prof_name)) {
752 os << "Another profiler (not via /hotspots/cpu) is running, "
753 "try again later" << (use_html ? "</body></html>" : "\n");
754 os.move_to(resp);
755 cntl->http_response().set_status_code(HTTP_STATUS_SERVICE_UNAVAILABLE);
756 return NotifyWaiters(type, cntl, view);
757 }
758 if (bthread_usleep(seconds * 1000000L) != 0) {
759 PLOG(WARNING) << "Profiling has been interrupted";
760 }
761 ProfilerStop();
762 } else if (type == PROFILING_CONTENTION) {
763 if (!bthread::ContentionProfilerStart(prof_name)) {
764 os << "Another profiler (not via /hotspots/contention) is running, "
765 "try again later" << (use_html ? "</body></html>" : "\n");
766 os.move_to(resp);
767 cntl->http_response().set_status_code(HTTP_STATUS_SERVICE_UNAVAILABLE);
768 return NotifyWaiters(type, cntl, view);
769 }
770 if (bthread_usleep(seconds * 1000000L) != 0) {
771 PLOG(WARNING) << "Profiling has been interrupted";
772 }
773 bthread::ContentionProfilerStop();
774 } else if (type == PROFILING_HEAP) {
775 MallocExtension* malloc_ext = MallocExtension::instance();
776 if (malloc_ext == NULL || !has_TCMALLOC_SAMPLE_PARAMETER()) {
777 os << "Heap profiler is not enabled";
778 if (malloc_ext != NULL) {
779 os << " (no TCMALLOC_SAMPLE_PARAMETER in env)";
780 }
781 os << '.' << (use_html ? "</body></html>" : "\n");
782 os.move_to(resp);
783 cntl->http_response().set_status_code(HTTP_STATUS_FORBIDDEN);
784 return NotifyWaiters(type, cntl, view);
785 }
786 std::string obj;
787 malloc_ext->GetHeapSample(&obj);
788 if (!WriteSmallFile(prof_name, obj)) {
789 os << "Fail to write " << prof_name
790 << (use_html ? "</body></html>" : "\n");
791 os.move_to(resp);
792 cntl->http_response().set_status_code(
793 HTTP_STATUS_INTERNAL_SERVER_ERROR);
794 return NotifyWaiters(type, cntl, view);
795 }
796 } else if (type == PROFILING_GROWTH) {
797 MallocExtension* malloc_ext = MallocExtension::instance();
798 if (malloc_ext == NULL) {
799 os << "Growth profiler is not enabled."
800 << (use_html ? "</body></html>" : "\n");
801 os.move_to(resp);
802 cntl->http_response().set_status_code(HTTP_STATUS_FORBIDDEN);
803 return NotifyWaiters(type, cntl, view);
804 }
805 std::string obj;
806 malloc_ext->GetHeapGrowthStacks(&obj);
807 if (!WriteSmallFile(prof_name, obj)) {
808 os << "Fail to write " << prof_name
809 << (use_html ? "</body></html>" : "\n");
810 os.move_to(resp);
811 cntl->http_response().set_status_code(
812 HTTP_STATUS_INTERNAL_SERVER_ERROR);
813 return NotifyWaiters(type, cntl, view);
814 }
815 } else {
816 os << "Unknown ProfilingType=" << type
817 << (use_html ? "</body></html>" : "\n");
818 os.move_to(resp);
819 cntl->http_response().set_status_code(
820 HTTP_STATUS_INTERNAL_SERVER_ERROR);
821 return NotifyWaiters(type, cntl, view);
822 }
823
824 std::vector<ProfilingWaiter> waiters;
825 // NOTE: Must be called before DisplayResult which calls done->Run() and
826 // deletes cntl.
827 ConsumeWaiters(type, cntl, &waiters);
828 DisplayResult(cntl, done_guard.release(), prof_name, os.buf());
829
830 for (size_t i = 0; i < waiters.size(); ++i) {
831 DisplayResult(waiters[i].cntl, waiters[i].done, prof_name, os.buf());
832 }
833}
834
835static void StartProfiling(ProfilingType type,
836 ::google::protobuf::RpcController* cntl_base,
837 ::google::protobuf::Closure* done) {
838 ClosureGuard done_guard(done);
839 Controller *cntl = static_cast<Controller*>(cntl_base);
840 butil::IOBuf& resp = cntl->response_attachment();
841 const bool use_html = UseHTML(cntl->http_request());
842 butil::IOBufBuilder os;
843 bool enabled = false;
844 const char* extra_desc = "";
845 if (type == PROFILING_CPU) {
846 enabled = cpu_profiler_enabled;
847 } else if (type == PROFILING_CONTENTION) {
848 enabled = true;
849 } else if (type == PROFILING_HEAP) {
850 enabled = IsHeapProfilerEnabled();
851 if (enabled && !has_TCMALLOC_SAMPLE_PARAMETER()) {
852 enabled = false;
853 extra_desc = " (no TCMALLOC_SAMPLE_PARAMETER in env)";
854 }
855 } else if (type == PROFILING_GROWTH) {
856 enabled = IsHeapProfilerEnabled();
857 }
858 const char* const type_str = ProfilingType2String(type);
859
860#if defined(OS_MACOSX)
861 if (!has_GOOGLE_PPROF_BINARY_PATH()) {
862 enabled = false;
863 extra_desc = "(no GOOGLE_PPROF_BINARY_PATH in env)";
864 }
865#endif
866
867 if (!use_html) {
868 if (!enabled) {
869 os << "Error: " << type_str << " profiler is not enabled."
870 << extra_desc << "\n"
871 "Read the docs: docs/cn/{cpu_profiler.md,heap_profiler.md}\n";
872 os.move_to(cntl->response_attachment());
873 cntl->http_response().set_status_code(HTTP_STATUS_FORBIDDEN);
874 return;
875 }
876 // Console can only use non-responsive version, namely the curl
877 // blocks until profiling is done.
878 return DoProfiling(type, cntl, done_guard.release());
879 }
880
881 const int seconds = ReadSeconds(cntl);
882 const std::string* view = cntl->http_request().uri().GetQuery("view");
883 const bool show_ccount = cntl->http_request().uri().GetQuery("ccount");
884 const std::string* base_name = cntl->http_request().uri().GetQuery("base");
885 const std::string* display_type_query = cntl->http_request().uri().GetQuery("display_type");
886 DisplayType display_type = DisplayType::kDot;
887 if (display_type_query) {
888 display_type = StringToDisplayType(*display_type_query);
889 if (display_type == DisplayType::kUnknown) {
890 return cntl->SetFailed(EINVAL, "Invalid display_type=%s", display_type_query->c_str());
891 }
892 }
893
894 ProfilingClient profiling_client;
895 size_t nwaiters = 0;
896 ProfilingEnvironment & env = g_env[type];
897 if (view == NULL) {
898 BAIDU_SCOPED_LOCK(env.mutex);
899 if (env.client) {
900 profiling_client = *env.client;
901 nwaiters = (env.waiters ? env.waiters->size() : 0);
902 }
903 }
904
905 cntl->http_response().set_content_type("text/html");
906 os << "<!DOCTYPE html><html><head>\n"
907 "<script language=\"javascript\" type=\"text/javascript\""
908 " src=\"/js/jquery_min\"></script>\n"
909 << TabsHead()
910 << "<style type=\"text/css\">\n"
911 ".logo {position: fixed; bottom: 0px; right: 0px; }\n"
912 ".logo_text {color: #B0B0B0; }\n"
913 " </style>\n"
914 "<script type=\"text/javascript\">\n"
915 "function generateURL() {\n"
916 " var past_prof = document.getElementById('view_prof').value;\n"
917 " var base_prof = document.getElementById('base_prof').value;\n"
918 " var display_type = document.getElementById('display_type').value;\n";
919 if (type == PROFILING_CONTENTION) {
920 os << " var show_ccount = document.getElementById('ccount_cb').checked;\n";
921 }
922 os << " var targetURL = '/hotspots/" << type_str << "';\n"
923 " targetURL += '?display_type=' + display_type;\n"
924 " if (past_prof != '') {\n"
925 " targetURL += '&view=' + past_prof;\n"
926 " }\n"
927 " if (base_prof != '') {\n"
928 " targetURL += '&base=' + base_prof;\n"
929 " }\n";
930 if (type == PROFILING_CONTENTION) {
931 os <<
932 " if (show_ccount) {\n"
933 " targetURL += '&ccount';\n"
934 " }\n";
935 }
936 os << " return targetURL;\n"
937 "}\n"
938 "$(function() {\n"
939 " function onDataReceived(data) {\n";
940 if (view == NULL) {
941 os <<
942 " var selEnd = data.indexOf('[addToProfEnd]');\n"
943 " if (selEnd != -1) {\n"
944 " var sel = document.getElementById('view_prof');\n"
945 " var option = document.createElement('option');\n"
946 " option.value = data.substring(0, selEnd);\n"
947 " option.text = option.value;\n"
948 " var slash_index = option.value.lastIndexOf('/');\n"
949 " if (slash_index != -1) {\n"
950 " option.text = option.value.substring(slash_index + 1);\n"
951 " }\n"
952 " var option1 = sel.options[1];\n"
953 " if (option1 == null || option1.text != option.text) {\n"
954 " sel.add(option, 1);\n"
955 " } else if (option1 != null) {\n"
956 " console.log('merged ' + option.text);\n"
957 " }\n"
958 " sel.selectedIndex = 1;\n"
959 " window.history.pushState('', '', generateURL());\n"
960 " data = data.substring(selEnd + '[addToProfEnd]'.length);\n"
961 " }\n";
962 }
963 os <<
964 " var index = data.indexOf('digraph ');\n"
965 " if (index == -1) {\n"
966 " var selEnd = data.indexOf('[addToProfEnd]');\n"
967 " if (selEnd != -1) {\n"
968 " data = data.substring(selEnd + '[addToProfEnd]'.length);\n"
969 " }\n"
970 " $(\"#profiling-result\").html('<pre>' + data + '</pre>');\n"
971 " if (data.indexOf('FlameGraph') != -1) { init(); }"
972 " } else {\n"
973 " $(\"#profiling-result\").html('Plotting ...');\n"
974 " var svg = Viz(data.substring(index), \"svg\");\n"
975 " $(\"#profiling-result\").html(svg);\n"
976 " }\n"
977 " }\n"
978 " function onErrorReceived(xhr, ajaxOptions, thrownError) {\n"
979 " $(\"#profiling-result\").html(xhr.responseText);\n"
980 " }\n"
981 " $.ajax({\n"
982 " url: \"/hotspots/" << type_str << "_non_responsive?console=1";
983 if (type == PROFILING_CPU || type == PROFILING_CONTENTION) {
984 os << "&seconds=" << seconds;
985 }
986 if (profiling_client.id != 0) {
987 os << "&profiling_id=" << profiling_client.id;
988 }
989 os << "&display_type=" << DisplayTypeToString(display_type);
990 if (show_ccount) {
991 os << "&ccount";
992 }
993 if (view) {
994 os << "&view=" << *view;
995 }
996 if (base_name) {
997 os << "&base=" << *base_name;
998 }
999 os << "\",\n"
1000 " type: \"GET\",\n"
1001 " dataType: \"html\",\n"
1002 " success: onDataReceived,\n"
1003 " error: onErrorReceived\n"
1004 " });\n"
1005 "});\n"
1006 "function onSelectProf() {\n"
1007 " window.location.href = generateURL();\n"
1008 "}\n"
1009 "function onChangedCB(cb) {\n"
1010 " onSelectProf();\n"
1011 "}\n"
1012 "</script>\n"
1013 "</head>\n"
1014 "<body>\n";
1015 cntl->server()->PrintTabsBody(os, type_str);
1016
1017 TRACEPRINTF("Begin to enumerate profiles");
1018 std::vector<std::string> past_profs;
1019 butil::FilePath prof_dir(FLAGS_rpc_profiling_dir);
1020 prof_dir = prof_dir.Append(GetProgramChecksum());
1021 std::string file_pattern;
1022 file_pattern.reserve(15);
1023 file_pattern.append("*.");
1024 file_pattern.append(type_str);
1025 butil::FileEnumerator prof_enum(prof_dir, false/*non recursive*/,
1026 butil::FileEnumerator::FILES,
1027 file_pattern);
1028 std::string file_path;
1029 for (butil::FilePath name = prof_enum.Next(); !name.empty();
1030 name = prof_enum.Next()) {
1031 // NOTE: name already includes dir.
1032 if (past_profs.empty()) {
1033 past_profs.reserve(16);
1034 }
1035 past_profs.push_back(name.value());
1036 }
1037 if (!past_profs.empty()) {
1038 TRACEPRINTF("Sort %lu profiles in decending order", past_profs.size());
1039 std::sort(past_profs.begin(), past_profs.end(), std::greater<std::string>());
1040 int max_profiles = FLAGS_max_profiles_kept/*may be reloaded*/;
1041 if (max_profiles < 0) {
1042 max_profiles = 0;
1043 }
1044 if (past_profs.size() > (size_t)max_profiles) {
1045 TRACEPRINTF("Remove %lu profiles",
1046 past_profs.size() - (size_t)max_profiles);
1047 for (size_t i = max_profiles; i < past_profs.size(); ++i) {
1048 CHECK(butil::DeleteFile(butil::FilePath(past_profs[i]), false));
1049 std::string cache_path;
1050 cache_path.reserve(past_profs[i].size() + 7);
1051 cache_path += past_profs[i];
1052 cache_path += ".cache";
1053 CHECK(butil::DeleteFile(butil::FilePath(cache_path), true));
1054 }
1055 past_profs.resize(max_profiles);
1056 }
1057 }
1058 TRACEPRINTF("End enumeration");
1059
1060 os << "<pre style='display:inline'>View: </pre>"
1061 "<select id='view_prof' onchange='onSelectProf()'>";
1062 os << "<option value=''>&lt;new profile&gt;</option>";
1063 for (size_t i = 0; i < past_profs.size(); ++i) {
1064 os << "<option value='" << past_profs[i] << "' ";
1065 if (view != NULL && past_profs[i] == *view) {
1066 os << "selected";
1067 }
1068 os << '>' << GetBaseName(&past_profs[i]);
1069 }
1070 os << "</select>";
1071 os << "<div><pre style='display:inline'>Display: </pre>"
1072 "<select id='display_type' onchange='onSelectProf()'>"
1073 "<option value=dot" << (display_type == DisplayType::kDot ? " selected" : "") << ">dot</option>"
1074#if defined(OS_LINUX)
1075 "<option value=flame" << (display_type == DisplayType::kFlameGraph ? " selected" : "") << ">flame</option>"
1076#endif
1077 "<option value=text" << (display_type == DisplayType::kText ? " selected" : "") << ">text</option></select>";
1078 if (type == PROFILING_CONTENTION) {
1079 os << "&nbsp;&nbsp;&nbsp;<label for='ccount_cb'>"
1080 "<input id='ccount_cb' type='checkbox'"
1081 << (show_ccount ? " checked=''" : "") <<
1082 " onclick='onChangedCB(this);'>count</label>";
1083 }
1084 os << "</div><div><pre style='display:inline'>Diff: </pre>"
1085 "<select id='base_prof' onchange='onSelectProf()'>"
1086 "<option value=''>&lt;none&gt;</option>";
1087 for (size_t i = 0; i < past_profs.size(); ++i) {
1088 os << "<option value='" << past_profs[i] << "' ";
1089 if (base_name != NULL && past_profs[i] == *base_name) {
1090 os << "selected";
1091 }
1092 os << '>' << GetBaseName(&past_profs[i]);
1093 }
1094 os << "</select></div>";
1095
1096 if (!enabled && view == NULL) {
1097 os << "<p><span style='color:red'>Error:</span> "
1098 << type_str << " profiler is not enabled." << extra_desc << "</p>"
1099 "<p>To enable all profilers, link tcmalloc and define macros BRPC_ENABLE_CPU_PROFILER"
1100 "</p><p>Or read docs: <a href='https://github.com/brpc/brpc/blob/master/docs/cn/cpu_profiler.md'>cpu_profiler</a>"
1101 " and <a href='https://github.com/brpc/brpc/blob/master/docs/cn/heap_profiler.md'>heap_profiler</a>"
1102 "</p></body></html>";
1103 os.move_to(cntl->response_attachment());
1104 cntl->http_response().set_status_code(HTTP_STATUS_FORBIDDEN);
1105 return;
1106 }
1107
1108 if ((type == PROFILING_CPU || type == PROFILING_CONTENTION) && view == NULL) {
1109 if (seconds < 0) {
1110 os << "Invalid seconds</body></html>";
1111 os.move_to(cntl->response_attachment());
1112 cntl->http_response().set_status_code(HTTP_STATUS_BAD_REQUEST);
1113 return;
1114 }
1115 }
1116
1117 if (nwaiters >= CONCURRENT_PROFILING_LIMIT) {
1118 os << "Your profiling request is rejected because of "
1119 "too many concurrent profiling requests</body></html>";
1120 os.move_to(cntl->response_attachment());
1121 cntl->http_response().set_status_code(HTTP_STATUS_SERVICE_UNAVAILABLE);
1122 return;
1123 }
1124
1125 os << "<div id=\"profiling-result\">";
1126 if (profiling_client.seconds != 0) {
1127 const int wait_seconds =
1128 (int)ceil((profiling_client.end_us - butil::cpuwide_time_us())
1129 / 1000000.0);
1130 os << "Your request is merged with the request from "
1131 << profiling_client.point;
1132 if (type == PROFILING_CPU || type == PROFILING_CONTENTION) {
1133 os << ", showing in about " << wait_seconds << " seconds ...";
1134 }
1135 } else {
1136 if ((type == PROFILING_CPU || type == PROFILING_CONTENTION) && view == NULL) {
1137 os << "Profiling " << ProfilingType2String(type) << " for "
1138 << seconds << " seconds ...";
1139 } else {
1140 os << "Generating " << type_str << " profile ...";
1141 }
1142 }
1143 os << "</div><pre class='logo'><span class='logo_text'>" << logo()
1144 << "</span></pre></body>\n";
1145 if (display_type == DisplayType::kDot) {
1146 // don't need viz.js in text mode.
1147 os << "<script language=\"javascript\" type=\"text/javascript\""
1148 " src=\"/js/viz_min\"></script>\n";
1149 }
1150 os << "</html>";
1151 os.move_to(resp);
1152}
1153
1154void HotspotsService::cpu(
1155 ::google::protobuf::RpcController* cntl_base,
1156 const ::brpc::HotspotsRequest*,
1157 ::brpc::HotspotsResponse*,
1158 ::google::protobuf::Closure* done) {
1159 return StartProfiling(PROFILING_CPU, cntl_base, done);
1160}
1161
1162void HotspotsService::heap(
1163 ::google::protobuf::RpcController* cntl_base,
1164 const ::brpc::HotspotsRequest*,
1165 ::brpc::HotspotsResponse*,
1166 ::google::protobuf::Closure* done) {
1167 return StartProfiling(PROFILING_HEAP, cntl_base, done);
1168}
1169
1170void HotspotsService::growth(
1171 ::google::protobuf::RpcController* cntl_base,
1172 const ::brpc::HotspotsRequest*,
1173 ::brpc::HotspotsResponse*,
1174 ::google::protobuf::Closure* done) {
1175 return StartProfiling(PROFILING_GROWTH, cntl_base, done);
1176}
1177
1178void HotspotsService::contention(
1179 ::google::protobuf::RpcController* cntl_base,
1180 const ::brpc::HotspotsRequest*,
1181 ::brpc::HotspotsResponse*,
1182 ::google::protobuf::Closure* done) {
1183 return StartProfiling(PROFILING_CONTENTION, cntl_base, done);
1184}
1185
1186void HotspotsService::cpu_non_responsive(
1187 ::google::protobuf::RpcController* cntl_base,
1188 const ::brpc::HotspotsRequest*,
1189 ::brpc::HotspotsResponse*,
1190 ::google::protobuf::Closure* done) {
1191 return DoProfiling(PROFILING_CPU, cntl_base, done);
1192}
1193
1194void HotspotsService::heap_non_responsive(
1195 ::google::protobuf::RpcController* cntl_base,
1196 const ::brpc::HotspotsRequest*,
1197 ::brpc::HotspotsResponse*,
1198 ::google::protobuf::Closure* done) {
1199 return DoProfiling(PROFILING_HEAP, cntl_base, done);
1200}
1201
1202void HotspotsService::growth_non_responsive(
1203 ::google::protobuf::RpcController* cntl_base,
1204 const ::brpc::HotspotsRequest*,
1205 ::brpc::HotspotsResponse*,
1206 ::google::protobuf::Closure* done) {
1207 return DoProfiling(PROFILING_GROWTH, cntl_base, done);
1208}
1209
1210void HotspotsService::contention_non_responsive(
1211 ::google::protobuf::RpcController* cntl_base,
1212 const ::brpc::HotspotsRequest*,
1213 ::brpc::HotspotsResponse*,
1214 ::google::protobuf::Closure* done) {
1215 return DoProfiling(PROFILING_CONTENTION, cntl_base, done);
1216}
1217
1218void HotspotsService::GetTabInfo(TabInfoList* info_list) const {
1219 TabInfo* info = info_list->add();
1220 info->path = "/hotspots/cpu";
1221 info->tab_name = "cpu";
1222 info = info_list->add();
1223 info->path = "/hotspots/heap";
1224 info->tab_name = "heap";
1225 info = info_list->add();
1226 info->path = "/hotspots/growth";
1227 info->tab_name = "growth";
1228 info = info_list->add();
1229 info->path = "/hotspots/contention";
1230 info->tab_name = "contention";
1231}
1232
1233} // namespace brpc
1234