1 | #include "ruy/cpuinfo.h" |
2 | |
3 | #include <algorithm> |
4 | #include <cstdint> |
5 | #include <limits> |
6 | |
7 | #include "ruy/check_macros.h" |
8 | #include "ruy/cpu_cache_params.h" |
9 | #include "ruy/platform.h" |
10 | |
11 | #ifdef RUY_HAVE_CPUINFO |
12 | #include <cpuinfo.h> |
13 | #endif |
14 | |
15 | namespace ruy { |
16 | |
17 | namespace { |
18 | void MakeDummyCacheParams(CpuCacheParams* result) { |
19 | // Reasonable dummy values |
20 | result->local_cache_size = 32 * 1024; |
21 | result->last_level_cache_size = 512 * 1024; |
22 | } |
23 | } // end namespace |
24 | |
25 | #ifdef RUY_HAVE_CPUINFO |
26 | |
27 | CpuInfo::~CpuInfo() { |
28 | if (init_status_ == InitStatus::kInitialized) { |
29 | cpuinfo_deinitialize(); |
30 | } |
31 | } |
32 | |
33 | bool CpuInfo::EnsureInitialized() { |
34 | if (init_status_ == InitStatus::kNotYetAttempted) { |
35 | init_status_ = Initialize(); |
36 | RUY_DCHECK_NE(init_status_, InitStatus::kNotYetAttempted); |
37 | } |
38 | return init_status_ == InitStatus::kInitialized; |
39 | } |
40 | |
41 | namespace { |
42 | void QueryCacheParams(CpuCacheParams* cache_params) { |
43 | const int processors_count = cpuinfo_get_processors_count(); |
44 | RUY_DCHECK_GT(processors_count, 0); |
45 | int overall_local_cache_size = std::numeric_limits<int>::max(); |
46 | int overall_last_level_cache_size = std::numeric_limits<int>::max(); |
47 | for (int i = 0; i < processors_count; i++) { |
48 | int local_cache_size = 0; |
49 | int last_level_cache_size = 0; |
50 | const cpuinfo_processor* processor = cpuinfo_get_processor(i); |
51 | // Loop over cache levels. Ignoring L4 for now: it seems that in CPUs that |
52 | // have L4, we would still prefer to stay in lower-latency L3. |
53 | for (const cpuinfo_cache* cache : |
54 | {processor->cache.l1d, processor->cache.l2, processor->cache.l3}) { |
55 | if (!cache) { |
56 | continue; // continue, not break, it is possible to have L1+L3 but no |
57 | // L2. |
58 | } |
59 | const bool is_local = |
60 | cpuinfo_get_processor(cache->processor_start)->core == |
61 | cpuinfo_get_processor(cache->processor_start + |
62 | cache->processor_count - 1) |
63 | ->core; |
64 | if (is_local) { |
65 | local_cache_size = cache->size; |
66 | } |
67 | last_level_cache_size = cache->size; |
68 | } |
69 | // If no local cache was found, use the last-level cache. |
70 | if (!local_cache_size) { |
71 | local_cache_size = last_level_cache_size; |
72 | } |
73 | RUY_DCHECK_GT(local_cache_size, 0); |
74 | RUY_DCHECK_GT(last_level_cache_size, 0); |
75 | RUY_DCHECK_GE(last_level_cache_size, local_cache_size); |
76 | overall_local_cache_size = |
77 | std::min(overall_local_cache_size, local_cache_size); |
78 | overall_last_level_cache_size = |
79 | std::min(overall_last_level_cache_size, last_level_cache_size); |
80 | } |
81 | cache_params->local_cache_size = overall_local_cache_size; |
82 | cache_params->last_level_cache_size = overall_last_level_cache_size; |
83 | } |
84 | } // end namespace |
85 | |
86 | CpuInfo::InitStatus CpuInfo::Initialize() { |
87 | RUY_DCHECK_EQ(init_status_, InitStatus::kNotYetAttempted); |
88 | if (!cpuinfo_initialize()) { |
89 | MakeDummyCacheParams(&cache_params_); |
90 | return InitStatus::kFailed; |
91 | } |
92 | QueryCacheParams(&cache_params_); |
93 | return InitStatus::kInitialized; |
94 | } |
95 | |
96 | bool CpuInfo::NeonDotprod() { |
97 | return EnsureInitialized() && cpuinfo_has_arm_neon_dot(); |
98 | } |
99 | |
100 | bool CpuInfo::Sse42() { |
101 | return EnsureInitialized() && cpuinfo_has_x86_sse4_2(); |
102 | } |
103 | |
104 | bool CpuInfo::Avx2Fma() { |
105 | return EnsureInitialized() && cpuinfo_has_x86_avx2() && |
106 | cpuinfo_has_x86_fma3(); |
107 | } |
108 | |
109 | bool CpuInfo::Avx() { return EnsureInitialized() && cpuinfo_has_x86_avx(); } |
110 | |
111 | bool CpuInfo::Avx512() { |
112 | return EnsureInitialized() && cpuinfo_has_x86_avx512f() && |
113 | cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512cd() && |
114 | cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512vl(); |
115 | } |
116 | |
117 | bool CpuInfo::AvxVnni() { |
118 | return EnsureInitialized() && cpuinfo_has_x86_avx512vnni(); |
119 | } |
120 | |
121 | bool CpuInfo::CurrentCpuIsA55ish() { |
122 | if (!EnsureInitialized()) { |
123 | return false; |
124 | } |
125 | |
126 | switch (cpuinfo_get_uarch(cpuinfo_get_current_uarch_index())->uarch) { |
127 | case cpuinfo_uarch_cortex_a53: |
128 | case cpuinfo_uarch_cortex_a55r0: |
129 | case cpuinfo_uarch_cortex_a55: |
130 | return true; |
131 | default: |
132 | return false; |
133 | } |
134 | } |
135 | |
136 | bool CpuInfo::CurrentCpuIsX1() { |
137 | if (!EnsureInitialized()) { |
138 | return false; |
139 | } |
140 | if (cpuinfo_get_uarch(cpuinfo_get_current_uarch_index())->uarch == |
141 | cpuinfo_uarch_cortex_x1) { |
142 | return true; |
143 | } |
144 | return false; |
145 | } |
146 | |
147 | #else // not defined RUY_HAVE_CPUINFO |
148 | |
149 | CpuInfo::~CpuInfo() {} |
150 | bool CpuInfo::EnsureInitialized() { |
151 | if (init_status_ == InitStatus::kNotYetAttempted) { |
152 | MakeDummyCacheParams(&cache_params_); |
153 | init_status_ = InitStatus::kInitialized; |
154 | } |
155 | RUY_DCHECK_EQ(init_status_, InitStatus::kInitialized); |
156 | return true; |
157 | } |
158 | bool CpuInfo::NeonDotprod() { return false; } |
159 | bool CpuInfo::Sse42() { return false; } |
160 | bool CpuInfo::Avx() { return false; } |
161 | bool CpuInfo::Avx2Fma() { return false; } |
162 | bool CpuInfo::Avx512() { return false; } |
163 | bool CpuInfo::AvxVnni() { return false; } |
164 | bool CpuInfo::CurrentCpuIsA55ish() { return false; } |
165 | bool CpuInfo::CurrentCpuIsX1() { return false; } |
166 | |
167 | #endif |
168 | |
169 | const CpuCacheParams& CpuInfo::CacheParams() { |
170 | EnsureInitialized(); |
171 | // On failure, EnsureInitialized leaves dummy values in cache_params_. |
172 | return cache_params_; |
173 | } |
174 | |
175 | } // namespace ruy |
176 | |