1#include "ruy/cpuinfo.h"
2
3#include <algorithm>
4#include <cstdint>
5#include <limits>
6
7#include "ruy/check_macros.h"
8#include "ruy/cpu_cache_params.h"
9#include "ruy/platform.h"
10
11#ifdef RUY_HAVE_CPUINFO
12#include <cpuinfo.h>
13#endif
14
15namespace ruy {
16
17namespace {
18void MakeDummyCacheParams(CpuCacheParams* result) {
19 // Reasonable dummy values
20 result->local_cache_size = 32 * 1024;
21 result->last_level_cache_size = 512 * 1024;
22}
23} // end namespace
24
25#ifdef RUY_HAVE_CPUINFO
26
27CpuInfo::~CpuInfo() {
28 if (init_status_ == InitStatus::kInitialized) {
29 cpuinfo_deinitialize();
30 }
31}
32
33bool CpuInfo::EnsureInitialized() {
34 if (init_status_ == InitStatus::kNotYetAttempted) {
35 init_status_ = Initialize();
36 RUY_DCHECK_NE(init_status_, InitStatus::kNotYetAttempted);
37 }
38 return init_status_ == InitStatus::kInitialized;
39}
40
41namespace {
42void QueryCacheParams(CpuCacheParams* cache_params) {
43 const int processors_count = cpuinfo_get_processors_count();
44 RUY_DCHECK_GT(processors_count, 0);
45 int overall_local_cache_size = std::numeric_limits<int>::max();
46 int overall_last_level_cache_size = std::numeric_limits<int>::max();
47 for (int i = 0; i < processors_count; i++) {
48 int local_cache_size = 0;
49 int last_level_cache_size = 0;
50 const cpuinfo_processor* processor = cpuinfo_get_processor(i);
51 // Loop over cache levels. Ignoring L4 for now: it seems that in CPUs that
52 // have L4, we would still prefer to stay in lower-latency L3.
53 for (const cpuinfo_cache* cache :
54 {processor->cache.l1d, processor->cache.l2, processor->cache.l3}) {
55 if (!cache) {
56 continue; // continue, not break, it is possible to have L1+L3 but no
57 // L2.
58 }
59 const bool is_local =
60 cpuinfo_get_processor(cache->processor_start)->core ==
61 cpuinfo_get_processor(cache->processor_start +
62 cache->processor_count - 1)
63 ->core;
64 if (is_local) {
65 local_cache_size = cache->size;
66 }
67 last_level_cache_size = cache->size;
68 }
69 // If no local cache was found, use the last-level cache.
70 if (!local_cache_size) {
71 local_cache_size = last_level_cache_size;
72 }
73 RUY_DCHECK_GT(local_cache_size, 0);
74 RUY_DCHECK_GT(last_level_cache_size, 0);
75 RUY_DCHECK_GE(last_level_cache_size, local_cache_size);
76 overall_local_cache_size =
77 std::min(overall_local_cache_size, local_cache_size);
78 overall_last_level_cache_size =
79 std::min(overall_last_level_cache_size, last_level_cache_size);
80 }
81 cache_params->local_cache_size = overall_local_cache_size;
82 cache_params->last_level_cache_size = overall_last_level_cache_size;
83}
84} // end namespace
85
86CpuInfo::InitStatus CpuInfo::Initialize() {
87 RUY_DCHECK_EQ(init_status_, InitStatus::kNotYetAttempted);
88 if (!cpuinfo_initialize()) {
89 MakeDummyCacheParams(&cache_params_);
90 return InitStatus::kFailed;
91 }
92 QueryCacheParams(&cache_params_);
93 return InitStatus::kInitialized;
94}
95
96bool CpuInfo::NeonDotprod() {
97 return EnsureInitialized() && cpuinfo_has_arm_neon_dot();
98}
99
100bool CpuInfo::Sse42() {
101 return EnsureInitialized() && cpuinfo_has_x86_sse4_2();
102}
103
104bool CpuInfo::Avx2Fma() {
105 return EnsureInitialized() && cpuinfo_has_x86_avx2() &&
106 cpuinfo_has_x86_fma3();
107}
108
109bool CpuInfo::Avx() { return EnsureInitialized() && cpuinfo_has_x86_avx(); }
110
111bool CpuInfo::Avx512() {
112 return EnsureInitialized() && cpuinfo_has_x86_avx512f() &&
113 cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512cd() &&
114 cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512vl();
115}
116
117bool CpuInfo::AvxVnni() {
118 return EnsureInitialized() && cpuinfo_has_x86_avx512vnni();
119}
120
121bool CpuInfo::CurrentCpuIsA55ish() {
122 if (!EnsureInitialized()) {
123 return false;
124 }
125
126 switch (cpuinfo_get_uarch(cpuinfo_get_current_uarch_index())->uarch) {
127 case cpuinfo_uarch_cortex_a53:
128 case cpuinfo_uarch_cortex_a55r0:
129 case cpuinfo_uarch_cortex_a55:
130 return true;
131 default:
132 return false;
133 }
134}
135
136bool CpuInfo::CurrentCpuIsX1() {
137 if (!EnsureInitialized()) {
138 return false;
139 }
140 if (cpuinfo_get_uarch(cpuinfo_get_current_uarch_index())->uarch ==
141 cpuinfo_uarch_cortex_x1) {
142 return true;
143 }
144 return false;
145}
146
147#else // not defined RUY_HAVE_CPUINFO
148
149CpuInfo::~CpuInfo() {}
150bool CpuInfo::EnsureInitialized() {
151 if (init_status_ == InitStatus::kNotYetAttempted) {
152 MakeDummyCacheParams(&cache_params_);
153 init_status_ = InitStatus::kInitialized;
154 }
155 RUY_DCHECK_EQ(init_status_, InitStatus::kInitialized);
156 return true;
157}
158bool CpuInfo::NeonDotprod() { return false; }
159bool CpuInfo::Sse42() { return false; }
160bool CpuInfo::Avx() { return false; }
161bool CpuInfo::Avx2Fma() { return false; }
162bool CpuInfo::Avx512() { return false; }
163bool CpuInfo::AvxVnni() { return false; }
164bool CpuInfo::CurrentCpuIsA55ish() { return false; }
165bool CpuInfo::CurrentCpuIsX1() { return false; }
166
167#endif
168
169const CpuCacheParams& CpuInfo::CacheParams() {
170 EnsureInitialized();
171 // On failure, EnsureInitialized leaves dummy values in cache_params_.
172 return cache_params_;
173}
174
175} // namespace ruy
176