1 | /* Copyright 2019 Google LLC. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef RUY_RUY_PATH_H_ |
17 | #define RUY_RUY_PATH_H_ |
18 | |
19 | #include <cstdint> |
20 | |
21 | #include "ruy/platform.h" |
22 | #include "ruy/size_util.h" |
23 | |
24 | namespace ruy { |
25 | |
26 | // A Path is an implementation path, typically corresponding to a SIMD |
27 | // instruction set being targetted. For example, on the ARM architecture, |
28 | // Path::kNeon means using NEON instructions, and Path::kNeonDotprod means |
29 | // also using the newer NEON dot-product instructions. |
30 | // |
31 | // Different Path enum values are defined on different CPU architectures, |
32 | // corresponding to different SIMD ISA extensions available there. |
33 | // |
34 | // Path::kStandardCpp is the one Path that is always available. |
35 | // |
36 | // Path enum values are bits and may be OR-ed to form "sets of Paths". |
37 | // Ruy entry points such as ruy::Mul either implicitly use such a set of Paths, |
38 | // or allow passing an explicit one as a template parameter. The meaning of such |
39 | // an OR-ed Path combination is "compile all of |
40 | // these paths; which path is used will be determined at runtime". This is why |
41 | // for most users, it is enough to call ruy::Mul(...), which will compile a |
42 | // reasonable selection of paths for the target CPU architecture's various |
43 | // SIMD ISA extensions, and let ruy determine at runtime which one to use. |
44 | // Internally, after the actual path has been resolved, ruy's internal functions |
45 | // templatized on a Path tend to require that to be a single bit. |
46 | // |
47 | // An element of ruy's internal design was to allow for code compiled for |
48 | // multiple such paths to coexist without violating the C++ One Definition Rule |
49 | // (ODR). This is achieved by having all ruy internal functions, whose |
50 | // definition depends on a choice of Path, be templatized on a Path, so that |
51 | // each path-specific specialization is a separate symbol. There is never |
52 | // a need to compile ruy code with different compilation flags to enable |
53 | // different SIMD extensions and dispatch at runtime between them, as this is |
54 | // taken care of internally by ruy in an ODR-correct way. |
55 | enum class Path : std::uint8_t { |
56 | // This is a special null value, representing the absence of any path. |
57 | kNone = 0, |
58 | // Standard C++ implementation of Ruy's architecture-specific parts. |
59 | // |
60 | // This is intended for testing/development, and as a fallback for when |
61 | // the SIMD ISA extensions required by other paths are unavailable at runtime. |
62 | kStandardCpp = 0x1, |
63 | // Internal, test-only variants of StandardCpp used to exercise more corners |
64 | // of internal ruy logic. |
65 | // They are intentionally omitted from ruy::kAllPaths and ruy::kNonArchPaths, |
66 | // and are only ever used in dedicated ruy tests explicitly referencing them. |
67 | kInternalStandardCppVariant1 = 0x2, |
68 | kInternalStandardCppVariant2 = 0x4, |
69 | kInternalStandardCppVariant3 = 0x8, |
70 | |
71 | #if RUY_PLATFORM_ARM |
72 | // Optimized path using a widely available subset of ARM NEON instructions. |
73 | kNeon = 0x10, |
74 | // Optimized path making use of ARM NEON dot product instructions that are |
75 | // available on newer ARM cores. |
76 | kNeonDotprod = 0x20, |
77 | #endif // RUY_PLATFORM_ARM |
78 | |
79 | #if RUY_PLATFORM_X86 |
80 | // Optimized for AVX |
81 | // Compiled with -mavx |
82 | kAvx = 0x10, |
83 | // Optimized for AVX2+FMA. |
84 | // Compiled with -mavx2 -mfma. |
85 | kAvx2Fma = 0x20, |
86 | // Optimized for AVX-512. |
87 | // Compiled with -mavx512f -mavx512vl -mavx512cd -mavx512bw -mavx512dq. |
88 | kAvx512 = 0x40, |
89 | #endif // RUY_PLATFORM_X86 |
90 | }; |
91 | |
92 | inline constexpr Path operator|(Path p, Path q) { |
93 | return static_cast<Path>(static_cast<std::uint32_t>(p) | |
94 | static_cast<std::uint32_t>(q)); |
95 | } |
96 | |
97 | inline constexpr Path operator&(Path p, Path q) { |
98 | return static_cast<Path>(static_cast<std::uint32_t>(p) & |
99 | static_cast<std::uint32_t>(q)); |
100 | } |
101 | |
102 | inline constexpr Path operator^(Path p, Path q) { |
103 | return static_cast<Path>(static_cast<std::uint32_t>(p) ^ |
104 | static_cast<std::uint32_t>(q)); |
105 | } |
106 | |
107 | inline constexpr Path operator~(Path p) { |
108 | return static_cast<Path>(~static_cast<std::uint32_t>(p)); |
109 | } |
110 | |
111 | inline constexpr bool Disjoint(Path p, Path q) { |
112 | return (p & q) == Path::kNone; |
113 | } |
114 | |
115 | inline Path GetMostSignificantPath(Path path_mask) { |
116 | return static_cast<Path>(round_down_pot(static_cast<int>(path_mask))); |
117 | } |
118 | |
119 | // We define three disjoint sets of paths. |
120 | // |
121 | // kNonArchPaths is the set of paths that are defined regardless of |
122 | // the CPU architecture (excluding some internal test-only paths). |
123 | // These paths are slow, but portable. At the moment, |
124 | // that is only kStandardCpp. In the past, that used to also include a |
125 | // kReference path providing an even more basic implementation, but that has |
126 | // been split out into a separate library, see the ReferenceMul function. |
127 | constexpr Path kNonArchPaths = Path::kStandardCpp; |
128 | |
129 | // The other two are specific to each CPU architecture. Note that these sets |
130 | // do NOT include a fallback for when none of these architecture paths are |
131 | // supported at runtime by the CPU. For that, see the other constants defined |
132 | // further below. |
133 | // |
134 | // kDefaultArchPaths is the set of architecture-specific paths that |
135 | // we recommend for most users. It is part of kDefaultPaths defined |
136 | // below. |
137 | // |
138 | // kExtraArchPaths is the set of all other architecture-specific paths |
139 | // that for whatever reason we're not recommending to most users at the moment. |
140 | // Typically that would include work-in-progress paths, or paths targeting |
141 | // minority hardware that isn't the best compromise of code size to performance |
142 | // for most users. |
143 | |
144 | #if RUY_PLATFORM_NEON_64 |
145 | constexpr Path kDefaultArchPaths = Path::kNeon | Path::kNeonDotprod; |
146 | constexpr Path kExtraArchPaths = Path::kNone; |
147 | #elif RUY_PLATFORM_NEON_32 |
148 | constexpr Path kDefaultArchPaths = Path::kNeon; |
149 | constexpr Path kExtraArchPaths = Path::kNone; |
150 | #elif RUY_PLATFORM_X86 |
151 | constexpr Path kDefaultArchPaths = Path::kAvx | Path::kAvx2Fma | Path::kAvx512; |
152 | constexpr Path = Path::kNone; |
153 | #else |
154 | constexpr Path kDefaultArchPaths = Path::kNone; |
155 | constexpr Path kExtraArchPaths = Path::kNone; |
156 | #endif |
157 | |
158 | // kNonArchPathsIncludingInternalVariants is the set of all |
159 | // non-architecture-specific paths without exception. This includes some paths |
160 | // that are internal-only and test-only and not useful to any user. |
161 | static constexpr Path kNonArchPathsIncludingInternalVariants = |
162 | kNonArchPaths | Path::kInternalStandardCppVariant1 | |
163 | Path::kInternalStandardCppVariant2 | Path::kInternalStandardCppVariant3; |
164 | |
165 | // Enforce that kDefaultArchPaths, kExtraArchPaths and |
166 | // kNonArchPathsIncludingInternalVariants are mutually disjoint, |
167 | // and that kNonArchPaths is a subset of kNonArchPathsIncludingInternalVariants. |
168 | static_assert(Disjoint(kDefaultArchPaths, kExtraArchPaths), "" ); |
169 | static_assert(Disjoint(kDefaultArchPaths, |
170 | kNonArchPathsIncludingInternalVariants), |
171 | "" ); |
172 | static_assert(Disjoint(kExtraArchPaths, kNonArchPathsIncludingInternalVariants), |
173 | "" ); |
174 | static_assert(Disjoint(kNonArchPaths, ~kNonArchPathsIncludingInternalVariants), |
175 | "" ); |
176 | |
177 | // We now define two aggregate sets of paths for convenience, including |
178 | // both architecture-specific paths and some portable fallbacks. |
179 | // |
180 | // kDefaultPaths is the set of paths that we recommend most users to use. |
181 | // It is what ruy::Mul(...), the entry point not taking an explicit Path value, |
182 | // uses. |
183 | constexpr Path kDefaultPaths = Path::kStandardCpp | kDefaultArchPaths; |
184 | |
185 | // kAllPaths is the set of all paths that are available to compile, except |
186 | // some some internal test-only paths that no user would ever want to use. |
187 | // In addition to the Default paths, it also includes the extra |
188 | // architecture paths, as well as any other non-arch path besides kStandardCpp |
189 | // (there is none at the moment). |
190 | constexpr Path kAllPaths = kNonArchPaths | kDefaultArchPaths | kExtraArchPaths; |
191 | |
192 | // kAllPathsIncludingInternalVariants is the set of all paths without exception. |
193 | // This includes some paths that are internal-only and test-only and not useful |
194 | // to any user. |
195 | static constexpr Path kAllPathsIncludingInternalVariants = |
196 | kAllPaths | kNonArchPathsIncludingInternalVariants; |
197 | |
198 | static_assert(Disjoint(kDefaultPaths, ~kAllPaths), "" ); |
199 | static_assert(Disjoint(kAllPaths, ~kAllPathsIncludingInternalVariants), "" ); |
200 | |
201 | } // namespace ruy |
202 | |
203 | #endif // RUY_RUY_PATH_H_ |
204 | |