1// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// instrumentation.h: contains the definitions needed to
16// instrument code for profiling:
17// ScopedProfilingLabel, RegisterCurrentThreadForProfiling.
18//
19// profiler.h is only needed to drive the profiler:
20// StartProfiling, FinishProfiling.
21//
22// See the usage example in profiler.h.
23
24#ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_
25#define GEMMLOWP_PROFILING_INSTRUMENTATION_H_
26
27#include <cstdio>
28
29#ifndef GEMMLOWP_USE_STLPORT
30#include <cstdint>
31#else
32#include <stdint.h>
33namespace std {
34using ::int16_t;
35using ::int32_t;
36using ::int8_t;
37using ::size_t;
38using ::uint16_t;
39using ::uint32_t;
40using ::uint8_t;
41using ::uintptr_t;
42} // namespace std
43#endif
44
45#include <algorithm>
46#include <cassert>
47#include <cstdlib>
48
49#ifdef GEMMLOWP_PROFILING
50#include <cstring>
51#include <set>
52#endif
53
54#include "./pthread_everywhere.h"
55
56namespace gemmlowp {
57
58inline void ReleaseBuildAssertion(bool condition, const char* msg) {
59 if (!condition) {
60 fprintf(stderr, "gemmlowp error: %s\n", msg);
61 abort();
62 }
63}
64
65class Mutex {
66 public:
67 Mutex(const Mutex&) = delete;
68 Mutex& operator=(const Mutex&) = delete;
69
70 Mutex() { pthread_mutex_init(&m, NULL); }
71 ~Mutex() { pthread_mutex_destroy(&m); }
72
73 void Lock() { pthread_mutex_lock(&m); }
74 void Unlock() { pthread_mutex_unlock(&m); }
75
76 private:
77 pthread_mutex_t m;
78};
79
80class GlobalMutexes {
81 public:
82 static Mutex* Profiler() {
83 static Mutex m;
84 return &m;
85 }
86
87 static Mutex* EightBitIntGemm() {
88 static Mutex m;
89 return &m;
90 }
91};
92
93// A very simple RAII helper to lock and unlock a Mutex
94struct ScopedLock {
95 ScopedLock(Mutex* m) : _m(m) { _m->Lock(); }
96 ~ScopedLock() { _m->Unlock(); }
97
98 private:
99 Mutex* _m;
100};
101
102// Profiling definitions. Two paths: when profiling is enabled,
103// and when profiling is disabled.
104#ifdef GEMMLOWP_PROFILING
105// This code path is when profiling is enabled.
106
107// A pseudo-call-stack. Contrary to a real call-stack, this only
108// contains pointers to literal strings that were manually entered
109// in the instrumented code (see ScopedProfilingLabel).
110struct ProfilingStack {
111 static const std::size_t kMaxSize = 30;
112 typedef const char* LabelsArrayType[kMaxSize];
113 LabelsArrayType labels;
114 std::size_t size;
115 Mutex* lock;
116
117 ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); }
118 ~ProfilingStack() { delete lock; }
119
120 void Push(const char* label) {
121 ScopedLock sl(lock);
122 ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow");
123 labels[size] = label;
124 size++;
125 }
126
127 void Pop() {
128 ScopedLock sl(lock);
129 ReleaseBuildAssertion(size > 0, "ProfilingStack underflow");
130 size--;
131 }
132
133 void UpdateTop(const char* new_label) {
134 ScopedLock sl(lock);
135 assert(size);
136 labels[size - 1] = new_label;
137 }
138
139 ProfilingStack& operator=(const ProfilingStack& other) {
140 memcpy(this, &other, sizeof(ProfilingStack));
141 return *this;
142 }
143
144 bool operator==(const ProfilingStack& other) const {
145 return !memcmp(this, &other, sizeof(ProfilingStack));
146 }
147};
148
149static_assert(
150 !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)),
151 "ProfilingStack should have power-of-two size to fit in cache lines");
152
153struct ThreadInfo;
154
155// The global set of threads being profiled.
156inline std::set<ThreadInfo*>& ThreadsUnderProfiling() {
157 static std::set<ThreadInfo*> v;
158 return v;
159}
160
161struct ThreadInfo {
162 pthread_key_t key; // used only to get a callback at thread exit.
163 ProfilingStack stack;
164
165 ThreadInfo() {
166 pthread_key_create(&key, ThreadExitCallback);
167 pthread_setspecific(key, this);
168 stack.lock = new Mutex();
169 }
170
171 static void ThreadExitCallback(void* ptr) {
172 ScopedLock sl(GlobalMutexes::Profiler());
173 ThreadInfo* self = static_cast<ThreadInfo*>(ptr);
174 ThreadsUnderProfiling().erase(self);
175 }
176};
177
178inline ThreadInfo& ThreadLocalThreadInfo() {
179 static pthread_key_t key;
180 static auto DeleteThreadInfo = [](void* threadInfoPtr) {
181 ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr);
182 if (threadInfo) {
183 delete threadInfo;
184 }
185 };
186
187 // key_result is unused. The purpose of this 'static' local object is
188 // to have its initializer (the pthread_key_create call) performed exactly
189 // once, in a way that is guaranteed (since C++11) to be reentrant.
190 static const int key_result = pthread_key_create(&key, DeleteThreadInfo);
191 (void)key_result;
192
193 ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key));
194 if (!threadInfo) {
195 threadInfo = new ThreadInfo();
196 pthread_setspecific(key, threadInfo);
197 }
198 return *threadInfo;
199}
200
201// ScopedProfilingLabel is how one instruments code for profiling
202// with this profiler. Construct local ScopedProfilingLabel variables,
203// passing a literal string describing the local code. Profile
204// samples will then be annotated with this label, while it is in scope
205// (whence the name --- also known as RAII).
206// See the example in profiler.h.
207class ScopedProfilingLabel {
208 ProfilingStack* profiling_stack_;
209
210 public:
211 explicit ScopedProfilingLabel(const char* label)
212 : profiling_stack_(&ThreadLocalThreadInfo().stack) {
213 profiling_stack_->Push(label);
214 }
215
216 ~ScopedProfilingLabel() { profiling_stack_->Pop(); }
217
218 void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); }
219};
220
221// To be called once on each thread to be profiled.
222inline void RegisterCurrentThreadForProfiling() {
223 ScopedLock sl(GlobalMutexes::Profiler());
224 ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo());
225}
226
227#else // not GEMMLOWP_PROFILING
228// This code path is when profiling is disabled.
229
230// This empty definition of ScopedProfilingLabel ensures that
231// it has zero runtime overhead when profiling is disabled.
232struct ScopedProfilingLabel {
233 explicit ScopedProfilingLabel(const char*) {}
234 void Update(const char*) {}
235};
236
237inline void RegisterCurrentThreadForProfiling() {}
238
239#endif
240
241} // end namespace gemmlowp
242
243#endif // GEMMLOWP_PROFILING_INSTRUMENTATION_H_
244