1/* Bit and bytes utilities.
2
3 Bytes swap functions, reverse order of bytes:
4
5 - _Py_bswap16(uint16_t)
6 - _Py_bswap32(uint32_t)
7 - _Py_bswap64(uint64_t)
8*/
9
10#ifndef Py_INTERNAL_BITUTILS_H
11#define Py_INTERNAL_BITUTILS_H
12#ifdef __cplusplus
13extern "C" {
14#endif
15
16#ifndef Py_BUILD_CORE
17# error "this header requires Py_BUILD_CORE define"
18#endif
19
20#if defined(__GNUC__) \
21 && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
22 /* __builtin_bswap16() is available since GCC 4.8,
23 __builtin_bswap32() is available since GCC 4.3,
24 __builtin_bswap64() is available since GCC 4.3. */
25# define _PY_HAVE_BUILTIN_BSWAP
26#endif
27
28#ifdef _MSC_VER
29 /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
30# include <intrin.h>
31#endif
32
33static inline uint16_t
34_Py_bswap16(uint16_t word)
35{
36#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
37 return __builtin_bswap16(word);
38#elif defined(_MSC_VER)
39 Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
40 return _byteswap_ushort(word);
41#else
42 // Portable implementation which doesn't rely on circular bit shift
43 return ( ((word & UINT16_C(0x00FF)) << 8)
44 | ((word & UINT16_C(0xFF00)) >> 8));
45#endif
46}
47
48static inline uint32_t
49_Py_bswap32(uint32_t word)
50{
51#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
52 return __builtin_bswap32(word);
53#elif defined(_MSC_VER)
54 Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
55 return _byteswap_ulong(word);
56#else
57 // Portable implementation which doesn't rely on circular bit shift
58 return ( ((word & UINT32_C(0x000000FF)) << 24)
59 | ((word & UINT32_C(0x0000FF00)) << 8)
60 | ((word & UINT32_C(0x00FF0000)) >> 8)
61 | ((word & UINT32_C(0xFF000000)) >> 24));
62#endif
63}
64
65static inline uint64_t
66_Py_bswap64(uint64_t word)
67{
68#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
69 return __builtin_bswap64(word);
70#elif defined(_MSC_VER)
71 return _byteswap_uint64(word);
72#else
73 // Portable implementation which doesn't rely on circular bit shift
74 return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
75 | ((word & UINT64_C(0x000000000000FF00)) << 40)
76 | ((word & UINT64_C(0x0000000000FF0000)) << 24)
77 | ((word & UINT64_C(0x00000000FF000000)) << 8)
78 | ((word & UINT64_C(0x000000FF00000000)) >> 8)
79 | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
80 | ((word & UINT64_C(0x00FF000000000000)) >> 40)
81 | ((word & UINT64_C(0xFF00000000000000)) >> 56));
82#endif
83}
84
85
86// Population count: count the number of 1's in 'x'
87// (number of bits set to 1), also known as the hamming weight.
88//
89// Implementation note. CPUID is not used, to test if x86 POPCNT instruction
90// can be used, to keep the implementation simple. For example, Visual Studio
91// __popcnt() is not used this reason. The clang and GCC builtin function can
92// use the x86 POPCNT instruction if the target architecture has SSE4a or
93// newer.
94static inline int
95_Py_popcount32(uint32_t x)
96{
97#if (defined(__clang__) || defined(__GNUC__))
98
99#if SIZEOF_INT >= 4
100 Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
101 return __builtin_popcount(x);
102#else
103 // The C standard guarantees that unsigned long will always be big enough
104 // to hold a uint32_t value without losing information.
105 Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
106 return __builtin_popcountl(x);
107#endif
108
109#else
110 // 32-bit SWAR (SIMD Within A Register) popcount
111
112 // Binary: 0 1 0 1 ...
113 const uint32_t M1 = 0x55555555;
114 // Binary: 00 11 00 11. ..
115 const uint32_t M2 = 0x33333333;
116 // Binary: 0000 1111 0000 1111 ...
117 const uint32_t M4 = 0x0F0F0F0F;
118 // 256**4 + 256**3 + 256**2 + 256**1
119 const uint32_t SUM = 0x01010101;
120
121 // Put count of each 2 bits into those 2 bits
122 x = x - ((x >> 1) & M1);
123 // Put count of each 4 bits into those 4 bits
124 x = (x & M2) + ((x >> 2) & M2);
125 // Put count of each 8 bits into those 8 bits
126 x = (x + (x >> 4)) & M4;
127 // Sum of the 4 byte counts
128 return (uint32_t)((uint64_t)x * (uint64_t)SUM) >> 24;
129#endif
130}
131
132
133// Return the index of the most significant 1 bit in 'x'. This is the smallest
134// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
135static inline int
136_Py_bit_length(unsigned long x)
137{
138#if (defined(__clang__) || defined(__GNUC__))
139 if (x != 0) {
140 // __builtin_clzl() is available since GCC 3.4.
141 // Undefined behavior for x == 0.
142 return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
143 }
144 else {
145 return 0;
146 }
147#elif defined(_MSC_VER)
148 // _BitScanReverse() is documented to search 32 bits.
149 Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
150 unsigned long msb;
151 if (_BitScanReverse(&msb, x)) {
152 return (int)msb + 1;
153 }
154 else {
155 return 0;
156 }
157#else
158 const int BIT_LENGTH_TABLE[32] = {
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
161 };
162 int msb = 0;
163 while (x >= 32) {
164 msb += 6;
165 x >>= 6;
166 }
167 msb += BIT_LENGTH_TABLE[x];
168 return msb;
169#endif
170}
171
172
173#ifdef __cplusplus
174}
175#endif
176#endif /* !Py_INTERNAL_BITUTILS_H */
177