1/* Copyright 2019 Google LLC. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "ruy/ctx.h"
17
18#include <cstdlib>
19#include <functional>
20#include <string>
21
22#include "ruy/check_macros.h"
23#include "ruy/cpuinfo.h"
24#include "ruy/ctx_impl.h"
25#include "ruy/have_built_path_for.h"
26#include "ruy/path.h"
27#include "ruy/performance_advisory.h"
28#include "ruy/platform.h"
29#include "ruy/prepacked_cache.h"
30#include "ruy/trace.h"
31
32namespace ruy {
33
34const CtxImpl& Ctx::impl() const { return static_cast<const CtxImpl&>(*this); }
35CtxImpl* Ctx::mutable_impl() { return static_cast<CtxImpl*>(this); }
36
37Path Ctx::last_used_path() const { return impl().last_used_path_; }
38Tuning Ctx::explicit_tuning() const { return impl().explicit_tuning_; }
39void Ctx::set_explicit_tuning(Tuning value) {
40 mutable_impl()->explicit_tuning_ = value;
41}
42const ThreadPool& Ctx::thread_pool() const { return impl().thread_pool_; }
43ThreadPool* Ctx::mutable_thread_pool() { return &mutable_impl()->thread_pool_; }
44int Ctx::max_num_threads() const { return impl().max_num_threads_; }
45void Ctx::set_max_num_threads(int value) {
46 mutable_impl()->max_num_threads_ = value;
47}
48void Ctx::clear_performance_advisories() {
49 mutable_impl()->performance_advisory_ = PerformanceAdvisory::kNone;
50}
51void Ctx::set_performance_advisory(PerformanceAdvisory advisory) {
52 mutable_impl()->performance_advisory_ =
53 mutable_impl()->performance_advisory_ | advisory;
54}
55bool Ctx::performance_advisory(PerformanceAdvisory advisory) const {
56 return (impl().performance_advisory_ & advisory) !=
57 PerformanceAdvisory::kNone;
58}
59
60void Ctx::SetRuntimeEnabledPaths(Path paths) {
61 if (paths == Path::kNone) {
62 // Revert to default behavior using runtime detection.
63 mutable_impl()->runtime_enabled_paths_ = Path::kNone;
64 } else {
65 // Explicitly set enabled paths. Ensure that non-arch are always enabled
66 // (needed for fallbacks).
67 mutable_impl()->runtime_enabled_paths_ = paths | kNonArchPaths;
68 }
69}
70
71CpuInfo* Ctx::mutable_cpuinfo() { return &mutable_impl()->cpuinfo_; }
72
73namespace {
74
75int GetHexIntEnvVarOrZero(const char* name) {
76 const char* val = getenv(name);
77 if (!val) {
78 return 0;
79 }
80 return std::stoi(val, nullptr, 16);
81}
82
83// For each Path bit set in `paths_to_test`, performs runtime detection and
84// sets the corresponding bit in the return value if and only if it is
85// supported. Path bits that are not set in the input
86// `paths_to_detect` value are also left not set in the return value.
87Path DetectRuntimeSupportedPaths(Path paths_to_detect, CpuInfo* cpuinfo) {
88 // Paths in kNonArchPathsIncludingInternalVariants are always implicitly
89 // supported. Further logic below may add more bits to `results`.
90 Path result = kNonArchPathsIncludingInternalVariants;
91
92 // Conditionally sets the `path` bit in `result`, if reported as supported
93 // by the `is_supported` predicate.
94 auto maybe_add = [&](Path path, std::function<bool(void)> is_supported) {
95 if ((paths_to_detect & path) != Path::kNone) {
96 if (is_supported()) {
97 result = result | path;
98 }
99 }
100 };
101
102#if RUY_PLATFORM_ARM
103 // NEON is unconditionally available on ARM64.
104 // On ARM32 it's technically possible for it to be unavailable, but we've
105 // always chosen to just crash on such devices. We could reevaluate that,
106 // however for non-NEON devices to be actually supported, we would need to
107 // address also compiler-generated NEON code. That would mean to remove
108 // -mfpu=neon from ruy_copts and only use this flag in select NEON translation
109 // units, and implement have_built_path_for_neon, similar to the x86 SIMD
110 // paths.
111 maybe_add(Path::kNeon, []() { return true; });
112
113 // NEON dotprod requires runtime detection, however unlike the x86 SIMD paths
114 // it still does not require have_built_path_for because we unconditionally
115 // build it at the moment. That is largely because we have had to machine
116 // encode dotprod instructions, so we don't actually rely on toolchain support
117 // for them.
118 maybe_add(Path::kNeonDotprod, [=]() { return cpuinfo->NeonDotprod(); });
119#elif RUY_PLATFORM_X86
120 // x86 SIMD paths currently require both runtime detection, and detection of
121 // whether we're building the path at all.
122 maybe_add(Path::kAvx,
123 [=]() { return HaveBuiltPathForAvx() && cpuinfo->Avx(); });
124 maybe_add(Path::kAvx2Fma,
125 [=]() { return HaveBuiltPathForAvx2Fma() && cpuinfo->Avx2Fma(); });
126 maybe_add(Path::kAvx512,
127 [=]() { return HaveBuiltPathForAvx512() && cpuinfo->Avx512(); });
128#else
129 (void)maybe_add;
130 (void)cpuinfo;
131#endif
132
133 // Sanity checks
134 RUY_DCHECK_EQ(kNonArchPaths & ~result, Path::kNone);
135 RUY_DCHECK_EQ(
136 result & ~(kNonArchPathsIncludingInternalVariants | paths_to_detect),
137 Path::kNone);
138 return result;
139}
140
141} // namespace
142
143Path Ctx::GetRuntimeEnabledPaths() {
144 RUY_TRACE_SCOPE;
145 // Just a shorthand alias. Using a pointer to make it clear we're mutating
146 // this value in-place.
147 Path* paths = &mutable_impl()->runtime_enabled_paths_;
148
149 // The value Path::kNone indicates the initial state before detection has been
150 // performed.
151 if (*paths != Path::kNone) {
152 RUY_TRACE_INFO(GET_RUNTIME_ENABLED_PATHS_USING_SET_VALUE);
153 return *paths;
154 }
155 // User may have set path explicitly in env var.
156 Path paths_bitfield = static_cast<Path>(GetHexIntEnvVarOrZero("RUY_PATHS"));
157 if (paths_bitfield != Path::kNone) {
158 *paths = paths_bitfield;
159 RUY_TRACE_INFO(GET_RUNTIME_ENABLED_PATHS_USING_ENV_VAR);
160 return *paths;
161 }
162 // Finally, use runtime detection.
163 *paths = DetectRuntimeSupportedPaths(kAllPaths, mutable_cpuinfo());
164 RUY_TRACE_INFO(GET_RUNTIME_ENABLED_PATHS_USING_DETECTION);
165 return *paths;
166}
167
168Path Ctx::SelectPath(Path compiled_paths) {
169 return mutable_impl()->last_used_path_ =
170 GetMostSignificantPath(compiled_paths & GetRuntimeEnabledPaths());
171}
172
173void Ctx::EnsureThreadSpecificResources(int thread_count) {
174 auto& resources = mutable_impl()->thread_specific_resources_;
175 while (thread_count > static_cast<int>(resources.size())) {
176 resources.emplace_back(new ThreadSpecificResource);
177 }
178 RUY_DCHECK_LE(thread_count, static_cast<int>(resources.size()));
179}
180
181TuningResolver* Ctx::GetThreadSpecificTuningResolver(int thread_index) const {
182 const auto& resources = impl().thread_specific_resources_;
183 RUY_DCHECK_LT(thread_index, static_cast<int>(resources.size()));
184 return &resources[thread_index]->tuning_resolver;
185}
186
187Allocator* Ctx::GetThreadSpecificAllocator(int thread_index) const {
188 const auto& resources = impl().thread_specific_resources_;
189 RUY_DCHECK_LT(thread_index, static_cast<int>(resources.size()));
190 return &resources[thread_index]->allocator;
191}
192
193Allocator* Ctx::GetMainAllocator() {
194 if (!impl().main_allocator_) {
195 mutable_impl()->main_allocator_.reset(new Allocator);
196 }
197 return impl().main_allocator_.get();
198}
199
200PrepackedCache* Ctx::GetPrepackedCache() {
201 if (!impl().prepacked_cache_) {
202 mutable_impl()->prepacked_cache_.reset(new PrepackedCache);
203 }
204 return impl().prepacked_cache_.get();
205}
206
207Tuning Ctx::GetMainThreadTuning() {
208 EnsureThreadSpecificResources(1);
209 TuningResolver* tuning_resolver = GetThreadSpecificTuningResolver(0);
210 tuning_resolver->SetTuning(explicit_tuning());
211 return tuning_resolver->Resolve(mutable_cpuinfo());
212}
213
214void Ctx::ClearPrepackedCache() { mutable_impl()->prepacked_cache_ = nullptr; }
215
216} // namespace ruy
217