pycore_bitutils.h source code [python/Include/internal/pycore_bitutils.h]

1	/ Bit and bytes utilities.*
2
3	Bytes swap functions, reverse order of bytes:
4
5	- _Py_bswap16(uint16_t)
6	- _Py_bswap32(uint32_t)
7	- _Py_bswap64(uint64_t)
8	*/
9
10	#ifndef Py_INTERNAL_BITUTILS_H
11	#define Py_INTERNAL_BITUTILS_H
12	#ifdef __cplusplus
13	extern "C" {
14	#endif
15
16	#ifndef Py_BUILD_CORE
17	# error "this header requires Py_BUILD_CORE define"
18	#endif
19
20	#if defined(__GNUC__) \
21	&& ((__GNUC__ >= 5) \|\| (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
22	/ __builtin_bswap16() is available since GCC 4.8,*
23	__builtin_bswap32() is available since GCC 4.3,
24	__builtin_bswap64() is available since GCC 4.3. /*
25	# define _PY_HAVE_BUILTIN_BSWAP
26	#endif
27
28	#ifdef _MSC_VER
29	/ Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() /
30	# include <intrin.h>
31	#endif
32
33	static inline uint16_t
34	_Py_bswap16(uint16_t word)
35	{
36	#if defined(_PY_HAVE_BUILTIN_BSWAP) \|\| _Py__has_builtin(__builtin_bswap16)
37	return __builtin_bswap16(word);
38	#elif defined(_MSC_VER)
39	Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
40	return _byteswap_ushort(word);
41	#else
42	// Portable implementation which doesn't rely on circular bit shift
43	return ( ((word & UINT16_C(`0x00FF`)) << `8`)
44	\| ((word & UINT16_C(`0xFF00`)) >> `8`));
45	#endif
46	}
47
48	static inline uint32_t
49	_Py_bswap32(uint32_t word)
50	{
51	#if defined(_PY_HAVE_BUILTIN_BSWAP) \|\| _Py__has_builtin(__builtin_bswap32)
52	return __builtin_bswap32(word);
53	#elif defined(_MSC_VER)
54	Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
55	return _byteswap_ulong(word);
56	#else
57	// Portable implementation which doesn't rely on circular bit shift
58	return ( ((word & UINT32_C(`0x000000FF`)) << `24`)
59	\| ((word & UINT32_C(`0x0000FF00`)) << `8`)
60	\| ((word & UINT32_C(`0x00FF0000`)) >> `8`)
61	\| ((word & UINT32_C(`0xFF000000`)) >> `24`));
62	#endif
63	}
64
65	static inline uint64_t
66	_Py_bswap64(uint64_t word)
67	{
68	#if defined(_PY_HAVE_BUILTIN_BSWAP) \|\| _Py__has_builtin(__builtin_bswap64)
69	return __builtin_bswap64(word);
70	#elif defined(_MSC_VER)
71	return _byteswap_uint64(word);
72	#else
73	// Portable implementation which doesn't rely on circular bit shift
74	return ( ((word & UINT64_C(`0x00000000000000FF`)) << `56`)
75	\| ((word & UINT64_C(`0x000000000000FF00`)) << `40`)
76	\| ((word & UINT64_C(`0x0000000000FF0000`)) << `24`)
77	\| ((word & UINT64_C(`0x00000000FF000000`)) << `8`)
78	\| ((word & UINT64_C(`0x000000FF00000000`)) >> `8`)
79	\| ((word & UINT64_C(`0x0000FF0000000000`)) >> `24`)
80	\| ((word & UINT64_C(`0x00FF000000000000`)) >> `40`)
81	\| ((word & UINT64_C(`0xFF00000000000000`)) >> `56`));
82	#endif
83	}
84
85
86	// Population count: count the number of 1's in 'x'
87	// (number of bits set to 1), also known as the hamming weight.
88	//
89	// Implementation note. CPUID is not used, to test if x86 POPCNT instruction
90	// can be used, to keep the implementation simple. For example, Visual Studio
91	// __popcnt() is not used this reason. The clang and GCC builtin function can
92	// use the x86 POPCNT instruction if the target architecture has SSE4a or
93	// newer.
94	static inline int
95	_Py_popcount32(uint32_t x)
96	{
97	#if (defined(__clang__) \|\| defined(__GNUC__))
98
99	#if SIZEOF_INT >= 4
100	Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
101	return __builtin_popcount(x);
102	#else
103	// The C standard guarantees that unsigned long will always be big enough
104	// to hold a uint32_t value without losing information.
105	Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
106	return __builtin_popcountl(x);
107	#endif
108
109	#else
110	// 32-bit SWAR (SIMD Within A Register) popcount
111
112	// Binary: 0 1 0 1 ...
113	const uint32_t M1 = `0x55555555`;
114	// Binary: 00 11 00 11. ..
115	const uint32_t M2 = `0x33333333`;
116	// Binary: 0000 1111 0000 1111 ...
117	const uint32_t M4 = `0x0F0F0F0F`;
118	// 2564 + 2563 + 2562 + 2561
119	const uint32_t SUM = `0x01010101`;
120
121	// Put count of each 2 bits into those 2 bits
122	x = x - ((x >> `1`) & M1);
123	// Put count of each 4 bits into those 4 bits
124	x = (x & M2) + ((x >> `2`) & M2);
125	// Put count of each 8 bits into those 8 bits
126	x = (x + (x >> `4`)) & M4;
127	// Sum of the 4 byte counts
128	return (uint32_t)((uint64_t)x * (uint64_t)SUM) >> `24`;
129	#endif
130	}
131
132
133	// Return the index of the most significant 1 bit in 'x'. This is the smallest
134	// integer k such that x < 2k. Equivalent to floor(log2(x)) + 1 for x != 0.
135	static inline int
136	_Py_bit_length(unsigned long x)
137	{
138	#if (defined(__clang__) \|\| defined(__GNUC__))
139	if (x != `0`) {
140	// __builtin_clzl() is available since GCC 3.4.
141	// Undefined behavior for x == 0.
142	return (int)sizeof(unsigned long) * `8` - __builtin_clzl(x);
143	}
144	else {
145	return `0`;
146	}
147	#elif defined(_MSC_VER)
148	// _BitScanReverse() is documented to search 32 bits.
149	Py_BUILD_ASSERT(sizeof(unsigned long) <= `4`);
150	unsigned long msb;
151	if (_BitScanReverse(&msb, x)) {
152	return (int)msb + `1`;
153	}
154	else {
155	return `0`;
156	}
157	#else
158	const int BIT_LENGTH_TABLE[`32`] = {
159	`0`, `1`, `2`, `2`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
160	`5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`
161	};
162	int msb = `0`;
163	while (x >= `32`) {
164	msb += `6`;
165	x >>= `6`;
166	}
167	msb += BIT_LENGTH_TABLE[x];
168	return msb;
169	#endif
170	}
171
172
173	#ifdef __cplusplus
174	}
175	#endif
176	#endif /* !Py_INTERNAL_BITUTILS_H */
177

Browse the source code of python/Include/internal/pycore_bitutils.h