1/* Copyright 2019 Google LLC. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef RUY_RUY_PATH_H_
17#define RUY_RUY_PATH_H_
18
19#include <cstdint>
20
21#include "ruy/platform.h"
22#include "ruy/size_util.h"
23
24namespace ruy {
25
26// A Path is an implementation path, typically corresponding to a SIMD
27// instruction set being targetted. For example, on the ARM architecture,
28// Path::kNeon means using NEON instructions, and Path::kNeonDotprod means
29// also using the newer NEON dot-product instructions.
30//
31// Different Path enum values are defined on different CPU architectures,
32// corresponding to different SIMD ISA extensions available there.
33//
34// Path::kStandardCpp is the one Path that is always available.
35//
36// Path enum values are bits and may be OR-ed to form "sets of Paths".
37// Ruy entry points such as ruy::Mul either implicitly use such a set of Paths,
38// or allow passing an explicit one as a template parameter. The meaning of such
39// an OR-ed Path combination is "compile all of
40// these paths; which path is used will be determined at runtime". This is why
41// for most users, it is enough to call ruy::Mul(...), which will compile a
42// reasonable selection of paths for the target CPU architecture's various
43// SIMD ISA extensions, and let ruy determine at runtime which one to use.
44// Internally, after the actual path has been resolved, ruy's internal functions
45// templatized on a Path tend to require that to be a single bit.
46//
47// An element of ruy's internal design was to allow for code compiled for
48// multiple such paths to coexist without violating the C++ One Definition Rule
49// (ODR). This is achieved by having all ruy internal functions, whose
50// definition depends on a choice of Path, be templatized on a Path, so that
51// each path-specific specialization is a separate symbol. There is never
52// a need to compile ruy code with different compilation flags to enable
53// different SIMD extensions and dispatch at runtime between them, as this is
54// taken care of internally by ruy in an ODR-correct way.
55enum class Path : std::uint8_t {
56 // This is a special null value, representing the absence of any path.
57 kNone = 0,
58 // Standard C++ implementation of Ruy's architecture-specific parts.
59 //
60 // This is intended for testing/development, and as a fallback for when
61 // the SIMD ISA extensions required by other paths are unavailable at runtime.
62 kStandardCpp = 0x1,
63 // Internal, test-only variants of StandardCpp used to exercise more corners
64 // of internal ruy logic.
65 // They are intentionally omitted from ruy::kAllPaths and ruy::kNonArchPaths,
66 // and are only ever used in dedicated ruy tests explicitly referencing them.
67 kInternalStandardCppVariant1 = 0x2,
68 kInternalStandardCppVariant2 = 0x4,
69 kInternalStandardCppVariant3 = 0x8,
70
71#if RUY_PLATFORM_ARM
72 // Optimized path using a widely available subset of ARM NEON instructions.
73 kNeon = 0x10,
74 // Optimized path making use of ARM NEON dot product instructions that are
75 // available on newer ARM cores.
76 kNeonDotprod = 0x20,
77#endif // RUY_PLATFORM_ARM
78
79#if RUY_PLATFORM_X86
80 // Optimized for AVX
81 // Compiled with -mavx
82 kAvx = 0x10,
83 // Optimized for AVX2+FMA.
84 // Compiled with -mavx2 -mfma.
85 kAvx2Fma = 0x20,
86 // Optimized for AVX-512.
87 // Compiled with -mavx512f -mavx512vl -mavx512cd -mavx512bw -mavx512dq.
88 kAvx512 = 0x40,
89#endif // RUY_PLATFORM_X86
90};
91
92inline constexpr Path operator|(Path p, Path q) {
93 return static_cast<Path>(static_cast<std::uint32_t>(p) |
94 static_cast<std::uint32_t>(q));
95}
96
97inline constexpr Path operator&(Path p, Path q) {
98 return static_cast<Path>(static_cast<std::uint32_t>(p) &
99 static_cast<std::uint32_t>(q));
100}
101
102inline constexpr Path operator^(Path p, Path q) {
103 return static_cast<Path>(static_cast<std::uint32_t>(p) ^
104 static_cast<std::uint32_t>(q));
105}
106
107inline constexpr Path operator~(Path p) {
108 return static_cast<Path>(~static_cast<std::uint32_t>(p));
109}
110
111inline constexpr bool Disjoint(Path p, Path q) {
112 return (p & q) == Path::kNone;
113}
114
115inline Path GetMostSignificantPath(Path path_mask) {
116 return static_cast<Path>(round_down_pot(static_cast<int>(path_mask)));
117}
118
119// We define three disjoint sets of paths.
120//
121// kNonArchPaths is the set of paths that are defined regardless of
122// the CPU architecture (excluding some internal test-only paths).
123// These paths are slow, but portable. At the moment,
124// that is only kStandardCpp. In the past, that used to also include a
125// kReference path providing an even more basic implementation, but that has
126// been split out into a separate library, see the ReferenceMul function.
127constexpr Path kNonArchPaths = Path::kStandardCpp;
128
129// The other two are specific to each CPU architecture. Note that these sets
130// do NOT include a fallback for when none of these architecture paths are
131// supported at runtime by the CPU. For that, see the other constants defined
132// further below.
133//
134// kDefaultArchPaths is the set of architecture-specific paths that
135// we recommend for most users. It is part of kDefaultPaths defined
136// below.
137//
138// kExtraArchPaths is the set of all other architecture-specific paths
139// that for whatever reason we're not recommending to most users at the moment.
140// Typically that would include work-in-progress paths, or paths targeting
141// minority hardware that isn't the best compromise of code size to performance
142// for most users.
143
144#if RUY_PLATFORM_NEON_64
145constexpr Path kDefaultArchPaths = Path::kNeon | Path::kNeonDotprod;
146constexpr Path kExtraArchPaths = Path::kNone;
147#elif RUY_PLATFORM_NEON_32
148constexpr Path kDefaultArchPaths = Path::kNeon;
149constexpr Path kExtraArchPaths = Path::kNone;
150#elif RUY_PLATFORM_X86
151constexpr Path kDefaultArchPaths = Path::kAvx | Path::kAvx2Fma | Path::kAvx512;
152constexpr Path kExtraArchPaths = Path::kNone;
153#else
154constexpr Path kDefaultArchPaths = Path::kNone;
155constexpr Path kExtraArchPaths = Path::kNone;
156#endif
157
158// kNonArchPathsIncludingInternalVariants is the set of all
159// non-architecture-specific paths without exception. This includes some paths
160// that are internal-only and test-only and not useful to any user.
161static constexpr Path kNonArchPathsIncludingInternalVariants =
162 kNonArchPaths | Path::kInternalStandardCppVariant1 |
163 Path::kInternalStandardCppVariant2 | Path::kInternalStandardCppVariant3;
164
165// Enforce that kDefaultArchPaths, kExtraArchPaths and
166// kNonArchPathsIncludingInternalVariants are mutually disjoint,
167// and that kNonArchPaths is a subset of kNonArchPathsIncludingInternalVariants.
168static_assert(Disjoint(kDefaultArchPaths, kExtraArchPaths), "");
169static_assert(Disjoint(kDefaultArchPaths,
170 kNonArchPathsIncludingInternalVariants),
171 "");
172static_assert(Disjoint(kExtraArchPaths, kNonArchPathsIncludingInternalVariants),
173 "");
174static_assert(Disjoint(kNonArchPaths, ~kNonArchPathsIncludingInternalVariants),
175 "");
176
177// We now define two aggregate sets of paths for convenience, including
178// both architecture-specific paths and some portable fallbacks.
179//
180// kDefaultPaths is the set of paths that we recommend most users to use.
181// It is what ruy::Mul(...), the entry point not taking an explicit Path value,
182// uses.
183constexpr Path kDefaultPaths = Path::kStandardCpp | kDefaultArchPaths;
184
185// kAllPaths is the set of all paths that are available to compile, except
186// some some internal test-only paths that no user would ever want to use.
187// In addition to the Default paths, it also includes the extra
188// architecture paths, as well as any other non-arch path besides kStandardCpp
189// (there is none at the moment).
190constexpr Path kAllPaths = kNonArchPaths | kDefaultArchPaths | kExtraArchPaths;
191
192// kAllPathsIncludingInternalVariants is the set of all paths without exception.
193// This includes some paths that are internal-only and test-only and not useful
194// to any user.
195static constexpr Path kAllPathsIncludingInternalVariants =
196 kAllPaths | kNonArchPathsIncludingInternalVariants;
197
198static_assert(Disjoint(kDefaultPaths, ~kAllPaths), "");
199static_assert(Disjoint(kAllPaths, ~kAllPathsIncludingInternalVariants), "");
200
201} // namespace ruy
202
203#endif // RUY_RUY_PATH_H_
204