1 | // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | // instrumentation.h: contains the definitions needed to |
16 | // instrument code for profiling: |
17 | // ScopedProfilingLabel, RegisterCurrentThreadForProfiling. |
18 | // |
19 | // profiler.h is only needed to drive the profiler: |
20 | // StartProfiling, FinishProfiling. |
21 | // |
22 | // See the usage example in profiler.h. |
23 | |
24 | #ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_ |
25 | #define GEMMLOWP_PROFILING_INSTRUMENTATION_H_ |
26 | |
27 | #include <cstdio> |
28 | |
29 | #ifndef GEMMLOWP_USE_STLPORT |
30 | #include <cstdint> |
31 | #else |
32 | #include <stdint.h> |
33 | namespace std { |
34 | using ::int16_t; |
35 | using ::int32_t; |
36 | using ::int8_t; |
37 | using ::size_t; |
38 | using ::uint16_t; |
39 | using ::uint32_t; |
40 | using ::uint8_t; |
41 | using ::uintptr_t; |
42 | } // namespace std |
43 | #endif |
44 | |
45 | #include <algorithm> |
46 | #include <cassert> |
47 | #include <cstdlib> |
48 | |
49 | #ifdef GEMMLOWP_PROFILING |
50 | #include <cstring> |
51 | #include <set> |
52 | #endif |
53 | |
54 | #include "./pthread_everywhere.h" |
55 | |
56 | namespace gemmlowp { |
57 | |
58 | inline void ReleaseBuildAssertion(bool condition, const char* msg) { |
59 | if (!condition) { |
60 | fprintf(stderr, "gemmlowp error: %s\n" , msg); |
61 | abort(); |
62 | } |
63 | } |
64 | |
65 | class Mutex { |
66 | public: |
67 | Mutex(const Mutex&) = delete; |
68 | Mutex& operator=(const Mutex&) = delete; |
69 | |
70 | Mutex() { pthread_mutex_init(&m, NULL); } |
71 | ~Mutex() { pthread_mutex_destroy(&m); } |
72 | |
73 | void Lock() { pthread_mutex_lock(&m); } |
74 | void Unlock() { pthread_mutex_unlock(&m); } |
75 | |
76 | private: |
77 | pthread_mutex_t m; |
78 | }; |
79 | |
80 | class GlobalMutexes { |
81 | public: |
82 | static Mutex* Profiler() { |
83 | static Mutex m; |
84 | return &m; |
85 | } |
86 | |
87 | static Mutex* EightBitIntGemm() { |
88 | static Mutex m; |
89 | return &m; |
90 | } |
91 | }; |
92 | |
93 | // A very simple RAII helper to lock and unlock a Mutex |
94 | struct ScopedLock { |
95 | ScopedLock(Mutex* m) : _m(m) { _m->Lock(); } |
96 | ~ScopedLock() { _m->Unlock(); } |
97 | |
98 | private: |
99 | Mutex* _m; |
100 | }; |
101 | |
102 | // Profiling definitions. Two paths: when profiling is enabled, |
103 | // and when profiling is disabled. |
104 | #ifdef GEMMLOWP_PROFILING |
105 | // This code path is when profiling is enabled. |
106 | |
107 | // A pseudo-call-stack. Contrary to a real call-stack, this only |
108 | // contains pointers to literal strings that were manually entered |
109 | // in the instrumented code (see ScopedProfilingLabel). |
110 | struct ProfilingStack { |
111 | static const std::size_t kMaxSize = 30; |
112 | typedef const char* LabelsArrayType[kMaxSize]; |
113 | LabelsArrayType labels; |
114 | std::size_t size; |
115 | Mutex* lock; |
116 | |
117 | ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); } |
118 | ~ProfilingStack() { delete lock; } |
119 | |
120 | void Push(const char* label) { |
121 | ScopedLock sl(lock); |
122 | ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow" ); |
123 | labels[size] = label; |
124 | size++; |
125 | } |
126 | |
127 | void Pop() { |
128 | ScopedLock sl(lock); |
129 | ReleaseBuildAssertion(size > 0, "ProfilingStack underflow" ); |
130 | size--; |
131 | } |
132 | |
133 | void UpdateTop(const char* new_label) { |
134 | ScopedLock sl(lock); |
135 | assert(size); |
136 | labels[size - 1] = new_label; |
137 | } |
138 | |
139 | ProfilingStack& operator=(const ProfilingStack& other) { |
140 | memcpy(this, &other, sizeof(ProfilingStack)); |
141 | return *this; |
142 | } |
143 | |
144 | bool operator==(const ProfilingStack& other) const { |
145 | return !memcmp(this, &other, sizeof(ProfilingStack)); |
146 | } |
147 | }; |
148 | |
149 | static_assert( |
150 | !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)), |
151 | "ProfilingStack should have power-of-two size to fit in cache lines" ); |
152 | |
153 | struct ThreadInfo; |
154 | |
155 | // The global set of threads being profiled. |
156 | inline std::set<ThreadInfo*>& ThreadsUnderProfiling() { |
157 | static std::set<ThreadInfo*> v; |
158 | return v; |
159 | } |
160 | |
161 | struct ThreadInfo { |
162 | pthread_key_t key; // used only to get a callback at thread exit. |
163 | ProfilingStack stack; |
164 | |
165 | ThreadInfo() { |
166 | pthread_key_create(&key, ThreadExitCallback); |
167 | pthread_setspecific(key, this); |
168 | stack.lock = new Mutex(); |
169 | } |
170 | |
171 | static void ThreadExitCallback(void* ptr) { |
172 | ScopedLock sl(GlobalMutexes::Profiler()); |
173 | ThreadInfo* self = static_cast<ThreadInfo*>(ptr); |
174 | ThreadsUnderProfiling().erase(self); |
175 | } |
176 | }; |
177 | |
178 | inline ThreadInfo& ThreadLocalThreadInfo() { |
179 | static pthread_key_t key; |
180 | static auto DeleteThreadInfo = [](void* threadInfoPtr) { |
181 | ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr); |
182 | if (threadInfo) { |
183 | delete threadInfo; |
184 | } |
185 | }; |
186 | |
187 | // key_result is unused. The purpose of this 'static' local object is |
188 | // to have its initializer (the pthread_key_create call) performed exactly |
189 | // once, in a way that is guaranteed (since C++11) to be reentrant. |
190 | static const int key_result = pthread_key_create(&key, DeleteThreadInfo); |
191 | (void)key_result; |
192 | |
193 | ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key)); |
194 | if (!threadInfo) { |
195 | threadInfo = new ThreadInfo(); |
196 | pthread_setspecific(key, threadInfo); |
197 | } |
198 | return *threadInfo; |
199 | } |
200 | |
201 | // ScopedProfilingLabel is how one instruments code for profiling |
202 | // with this profiler. Construct local ScopedProfilingLabel variables, |
203 | // passing a literal string describing the local code. Profile |
204 | // samples will then be annotated with this label, while it is in scope |
205 | // (whence the name --- also known as RAII). |
206 | // See the example in profiler.h. |
207 | class ScopedProfilingLabel { |
208 | ProfilingStack* profiling_stack_; |
209 | |
210 | public: |
211 | explicit ScopedProfilingLabel(const char* label) |
212 | : profiling_stack_(&ThreadLocalThreadInfo().stack) { |
213 | profiling_stack_->Push(label); |
214 | } |
215 | |
216 | ~ScopedProfilingLabel() { profiling_stack_->Pop(); } |
217 | |
218 | void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); } |
219 | }; |
220 | |
221 | // To be called once on each thread to be profiled. |
222 | inline void RegisterCurrentThreadForProfiling() { |
223 | ScopedLock sl(GlobalMutexes::Profiler()); |
224 | ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo()); |
225 | } |
226 | |
227 | #else // not GEMMLOWP_PROFILING |
228 | // This code path is when profiling is disabled. |
229 | |
230 | // This empty definition of ScopedProfilingLabel ensures that |
231 | // it has zero runtime overhead when profiling is disabled. |
232 | struct ScopedProfilingLabel { |
233 | explicit ScopedProfilingLabel(const char*) {} |
234 | void Update(const char*) {} |
235 | }; |
236 | |
237 | inline void RegisterCurrentThreadForProfiling() {} |
238 | |
239 | #endif |
240 | |
241 | } // end namespace gemmlowp |
242 | |
243 | #endif // GEMMLOWP_PROFILING_INSTRUMENTATION_H_ |
244 | |