1/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef RUY_RUY_CPU_CACHE_PARAMS_H_
17#define RUY_RUY_CPU_CACHE_PARAMS_H_
18
19namespace ruy {
20
21// Holds some information about a CPU's data caches.
22//
23// Meaning of 'local': a 'local' cache means a cache that is used by only one
24// CPU core, not shared with other cores. It might still be used by multiple
25// 'processors' in case of SMT as in Intel HyperThreading. CPUs often have
26// multiple levels of local cache, e.g. L1 and L2. We typically return the
27// larger one, the assumption being that even the larger one has substantially
28// lower latency than any higher (non-local) cache, however as noted below (*)
29// the implementation may choose to ignore a cache level.
30//
31// Meaning of 'last level': this refers to some higher cache level, typically
32// shared among multiple CPU cores, so we considered using the terminology
33// 'shared' instead of 'last_level'. However that created some confusion of its
34// own, as the meaning of 'shared' varies between CPUs, with some CPUs not
35// having any level of cache shared among all cores. That is why we stick with
36// the 'last_level' terminology, however with the following caveats:
37// 1. As noted below (*) the implementation may choose to ignore a cache
38// level, which could cause the 'last level' cache according to ruy not to be
39// the actual last level.
40// 2. On some systems-on-chip there is a 'last level' cache outside of the
41// last level cache in the CPU complex. Ruy is not currently doing anything
42// specific regarding such caches.
43// 3. We haven't figured out how to amend our terminology to be meaningful
44// on NUMA architectures. NUMA hasn't been part of ruy's scope so far.
45//
46// (*) Note on ignoring certain cache levels:
47// The implementation may choose to ignore a cache if it's suspected not to
48// have compelling performance. This is true about all cache levels, but more
49// likely regarding the 'last level' cache. For example, a L4 cache may be
50// ignored if we believe that it's not the right latency/size compromise for us,
51// so on such a CPU, the L3 cache may be used as the 'last level' cache instead.
52//
53// (**) Note on CPUs with heterogeneous cores:
54// Some CPUs have multiple cores with different local caches. For example, some
55// ARM big.LITTLE CPUs have some CPU cores with L1=32k and L2=128k, and some
56// other CPU cores with L1=64k and L2=256k or even 512k. On such CPUs, the
57// fields in this struct refer to the minimum value over all cores. In other
58// words, we use conservative values that do not risk over-estimating local
59// cache sizes in case of a migration of our threads to smaller cores.
60//
61// Example:
62// On a Qualcomm S855 SoC, there are 8 CPU cores. Each core has L1 and L2 data
63// caches local to it:
64// - 4 cores have L1=32k, L2=128k.
65// - 3 cores have L1=64k, L2=256k.
66// - 1 core has L1=64k, L2=512k.
67// All 8 cores share a L3 cache of size 2M, and there is beyond that a SoC-level
68// cache of size 3M.
69// On such a system, we should have:
70// - local_level_cache_size=128k, the smallest L2 size.
71// - last_level_cache_size=2M, the L3 cache size, ignoring the SoC-level cache.
72struct CpuCacheParams final {
73 // Minimum value (see (**)), over all cores, of the size in bytes of its local
74 // cache (see "Meaning of 'local'").
75 int local_cache_size = 0;
76 // Minimum value (see (**)), over all cores, of the size in bytes of its last
77 // level cache (see "Meaning of 'last level'").
78 int last_level_cache_size = 0;
79};
80
81} // namespace ruy
82
83#endif // RUY_RUY_CPU_CACHE_PARAMS_H_
84