1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * This source code is licensed under the BSD-style license found in the |
5 | * LICENSE file in the root directory of this source tree. |
6 | */ |
7 | #pragma once |
8 | #include <cstdint> |
9 | |
10 | namespace fbgemm { |
11 | |
12 | namespace internal { |
13 | |
14 | // A constant array to initialize an AVX2 register to be used as a 32-bit |
15 | // granularity mask. |
16 | // clang-format off |
17 | alignas(64) static const int avx2_ps_or_epi32_masks[9][8] = { |
18 | // NOTE: clang-format wants to use a different formatting but the current |
19 | // formatting should be easier to read. |
20 | { 0, 0, 0, 0, 0, 0, 0, 0, }, |
21 | { -1, 0, 0, 0, 0, 0, 0, 0, }, |
22 | { -1, -1, 0, 0, 0, 0, 0, 0, }, |
23 | { -1, -1, -1, 0, 0, 0, 0, 0, }, |
24 | { -1, -1, -1, -1, 0, 0, 0, 0, }, |
25 | { -1, -1, -1, -1, -1, 0, 0, 0, }, |
26 | { -1, -1, -1, -1, -1, -1, 0, 0, }, |
27 | { -1, -1, -1, -1, -1, -1, -1, 0, }, |
28 | { -1, -1, -1, -1, -1, -1, -1, -1, }, |
29 | }; |
30 | |
31 | // mask can be accessed by avx2_ps_or_epi32_combined_mask[(8 - remainder) % 8] |
32 | static const int avx2_ps_or_epi32_combined_mask[16] = { |
33 | -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, |
34 | }; |
35 | |
36 | // A constant array to initialize an SSE register to be used as a 8-bit |
37 | // granularity mask. |
38 | alignas(64) static const std::int8_t sse_epi8_masks[17][16] = { |
39 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
40 | { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
41 | { -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
42 | { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
43 | { -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
44 | { -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
45 | { -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
46 | { -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, |
47 | { -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, }, |
48 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, }, |
49 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, }, |
50 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, }, |
51 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, }, |
52 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, }, |
53 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, }, |
54 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, }, |
55 | { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }, |
56 | }; |
57 | // clang-format on |
58 | |
59 | } // namespace internal |
60 | |
61 | } // namespace fbgemm |
62 | |