ctx.cc source code [tensorflow/external/ruy/ruy/ctx.cc]

1	/ Copyright 2019 Google LLC. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "ruy/ctx.h"
17
18	#include <cstdlib>
19	#include <functional>
20	#include <string>
21
22	#include "ruy/check_macros.h"
23	#include "ruy/cpuinfo.h"
24	#include "ruy/ctx_impl.h"
25	#include "ruy/have_built_path_for.h"
26	#include "ruy/path.h"
27	#include "ruy/performance_advisory.h"
28	#include "ruy/platform.h"
29	#include "ruy/prepacked_cache.h"
30	#include "ruy/trace.h"
31
32	namespace ruy {
33
34	const CtxImpl& Ctx::impl() const { return static_cast<const CtxImpl&>(*this); }
35	CtxImpl* Ctx::mutable_impl() { return static_cast<CtxImpl>(this*); }
36
37	Path Ctx::last_used_path() const { return impl().last_used_path_; }
38	Tuning Ctx::explicit_tuning() const { return impl().explicit_tuning_; }
39	void Ctx::set_explicit_tuning(Tuning value) {
40	mutable_impl()->explicit_tuning_ = value;
41	}
42	const ThreadPool& Ctx::thread_pool() const { return impl().thread_pool_; }
43	ThreadPool* Ctx::mutable_thread_pool() { return &mutable_impl()->thread_pool_; }
44	int Ctx::max_num_threads() const { return impl().max_num_threads_; }
45	void Ctx::set_max_num_threads(int value) {
46	mutable_impl()->max_num_threads_ = value;
47	}
48	void Ctx::clear_performance_advisories() {
49	mutable_impl()->performance_advisory_ = PerformanceAdvisory::kNone;
50	}
51	void Ctx::set_performance_advisory(PerformanceAdvisory advisory) {
52	mutable_impl()->performance_advisory_ =
53	mutable_impl()->performance_advisory_ \| advisory;
54	}
55	bool Ctx::performance_advisory(PerformanceAdvisory advisory) const {
56	return (impl().performance_advisory_ & advisory) !=
57	PerformanceAdvisory::kNone;
58	}
59
60	void Ctx::SetRuntimeEnabledPaths(Path paths) {
61	if (paths == Path::kNone) {
62	// Revert to default behavior using runtime detection.
63	mutable_impl()->runtime_enabled_paths_ = Path::kNone;
64	} else {
65	// Explicitly set enabled paths. Ensure that non-arch are always enabled
66	// (needed for fallbacks).
67	mutable_impl()->runtime_enabled_paths_ = paths \| kNonArchPaths;
68	}
69	}
70
71	CpuInfo* Ctx::mutable_cpuinfo() { return &mutable_impl()->cpuinfo_; }
72
73	namespace {
74
75	int GetHexIntEnvVarOrZero(const char* name) {
76	const char* val = getenv(name);
77	if (!val) {
78	return `0`;
79	}
80	return std::stoi(val, nullptr, `16`);
81	}
82
83	// For each Path bit set in `paths_to_test`, performs runtime detection and
84	// sets the corresponding bit in the return value if and only if it is
85	// supported. Path bits that are not set in the input
86	// `paths_to_detect` value are also left not set in the return value.
87	Path DetectRuntimeSupportedPaths(Path paths_to_detect, CpuInfo* cpuinfo) {
88	// Paths in kNonArchPathsIncludingInternalVariants are always implicitly
89	// supported. Further logic below may add more bits to `results`.
90	Path result = kNonArchPathsIncludingInternalVariants;
91
92	// Conditionally sets the `path` bit in `result`, if reported as supported
93	// by the `is_supported` predicate.
94	auto maybe_add = [&](Path path, std::function<bool(void)> is_supported) {
95	if ((paths_to_detect & path) != Path::kNone) {
96	if (is_supported ()) {
97	result = result \| path;
98	}
99	}
100	};
101
102	#if RUY_PLATFORM_ARM
103	// NEON is unconditionally available on ARM64.
104	// On ARM32 it's technically possible for it to be unavailable, but we've
105	// always chosen to just crash on such devices. We could reevaluate that,
106	// however for non-NEON devices to be actually supported, we would need to
107	// address also compiler-generated NEON code. That would mean to remove
108	// -mfpu=neon from ruy_copts and only use this flag in select NEON translation
109	// units, and implement have_built_path_for_neon, similar to the x86 SIMD
110	// paths.
111	maybe_add(Path::kNeon, []() { return true; });
112
113	// NEON dotprod requires runtime detection, however unlike the x86 SIMD paths
114	// it still does not require have_built_path_for because we unconditionally
115	// build it at the moment. That is largely because we have had to machine
116	// encode dotprod instructions, so we don't actually rely on toolchain support
117	// for them.
118	maybe_add(Path::kNeonDotprod, [=]() { return cpuinfo->NeonDotprod(); });
119	#elif RUY_PLATFORM_X86
120	// x86 SIMD paths currently require both runtime detection, and detection of
121	// whether we're building the path at all.
122	maybe_add (Path::kAvx,
123	[=]() { return HaveBuiltPathForAvx() && cpuinfo->Avx(); });
124	maybe_add (Path::kAvx2Fma,
125	[=]() { return HaveBuiltPathForAvx2Fma() && cpuinfo->Avx2Fma(); });
126	maybe_add (Path::kAvx512,
127	[=]() { return HaveBuiltPathForAvx512() && cpuinfo->Avx512(); });
128	#else
129	(void)maybe_add;
130	(void)cpuinfo;
131	#endif
132
133	// Sanity checks
134	RUY_DCHECK_EQ(kNonArchPaths & ~result, Path::kNone);
135	RUY_DCHECK_EQ(
136	result & ~(kNonArchPathsIncludingInternalVariants \| paths_to_detect),
137	Path::kNone);
138	return result;
139	}
140
141	} // namespace
142
143	Path Ctx::GetRuntimeEnabledPaths() {
144	RUY_TRACE_SCOPE;
145	// Just a shorthand alias. Using a pointer to make it clear we're mutating
146	// this value in-place.
147	Path* paths = &mutable_impl()->runtime_enabled_paths_;
148
149	// The value Path::kNone indicates the initial state before detection has been
150	// performed.
151	if (*paths != Path::kNone) {
152	RUY_TRACE_INFO(GET_RUNTIME_ENABLED_PATHS_USING_SET_VALUE);
153	return *paths;
154	}
155	// User may have set path explicitly in env var.
156	Path paths_bitfield = static_cast<Path>(GetHexIntEnvVarOrZero("RUY_PATHS"));
157	if (paths_bitfield != Path::kNone) {
158	*paths = paths_bitfield;
159	RUY_TRACE_INFO(GET_RUNTIME_ENABLED_PATHS_USING_ENV_VAR);
160	return *paths;
161	}
162	// Finally, use runtime detection.
163	*paths = DetectRuntimeSupportedPaths(kAllPaths, mutable_cpuinfo());
164	RUY_TRACE_INFO(GET_RUNTIME_ENABLED_PATHS_USING_DETECTION);
165	return *paths;
166	}
167
168	Path Ctx::SelectPath(Path compiled_paths) {
169	return mutable_impl()->last_used_path_ =
170	GetMostSignificantPath(compiled_paths & GetRuntimeEnabledPaths());
171	}
172
173	void Ctx::EnsureThreadSpecificResources(int thread_count) {
174	auto& resources = mutable_impl()->thread_specific_resources_;
175	while (thread_count > static_cast<int>(resources.size())) {
176	resources.emplace_back(new ThreadSpecificResource);
177	}
178	RUY_DCHECK_LE(thread_count, static_cast<int>(resources.size()));
179	}
180
181	TuningResolver* Ctx::GetThreadSpecificTuningResolver(int thread_index) const {
182	const auto& resources = impl().thread_specific_resources_;
183	RUY_DCHECK_LT(thread_index, static_cast<int>(resources.size()));
184	return &resources [thread_index]->tuning_resolver;
185	}
186
187	Allocator* Ctx::GetThreadSpecificAllocator(int thread_index) const {
188	const auto& resources = impl().thread_specific_resources_;
189	RUY_DCHECK_LT(thread_index, static_cast<int>(resources.size()));
190	return &resources [thread_index]->allocator;
191	}
192
193	Allocator* Ctx::GetMainAllocator() {
194	if (!impl().main_allocator_) {
195	mutable_impl()->main_allocator_.reset(new Allocator);
196	}
197	return impl().main_allocator_.get();
198	}
199
200	PrepackedCache* Ctx::GetPrepackedCache() {
201	if (!impl().prepacked_cache_) {
202	mutable_impl()->prepacked_cache_.reset(new PrepackedCache);
203	}
204	return impl().prepacked_cache_.get();
205	}
206
207	Tuning Ctx::GetMainThreadTuning() {
208	EnsureThreadSpecificResources(`1`);
209	TuningResolver* tuning_resolver = GetThreadSpecificTuningResolver(`0`);
210	tuning_resolver->SetTuning(explicit_tuning());
211	return tuning_resolver->Resolve(mutable_cpuinfo());
212	}
213
214	void Ctx::ClearPrepackedCache() { mutable_impl()->prepacked_cache_ = nullptr; }
215
216	} // namespace ruy
217

Browse the source code of tensorflow/external/ruy/ruy/ctx.cc