GroupVarint.h source code [glow/thirdparty/folly/folly/GroupVarint.h]

1	/*
2	* Copyright (c) Facebook, Inc. and its affiliates.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#pragma once
18
19	#include <cstdint>
20	#include <limits>
21
22	#include <glog/logging.h>
23
24	#include <folly/Portability.h>
25	#include <folly/Range.h>
26	#include <folly/detail/GroupVarintDetail.h>
27	#include <folly/lang/Bits.h>
28	#include <folly/portability/Builtins.h>
29
30	#if !defined(__GNUC__) && !defined(_MSC_VER)
31	#error GroupVarint.h requires GCC or MSVC
32	#endif
33
34	#if FOLLY_X64 \|\| defined(__i386__) \|\| FOLLY_PPC64 \|\| FOLLY_AARCH64
35	#define FOLLY_HAVE_GROUP_VARINT 1
36	#else
37	#define FOLLY_HAVE_GROUP_VARINT 0
38	#endif
39
40	#if FOLLY_HAVE_GROUP_VARINT
41
42	#if FOLLY_SSE >= 3
43	#include <nmmintrin.h>
44	namespace folly {
45	namespace detail {
46	extern const std::array<std::array<std::uint32_t, `4`>, `256`> groupVarintSSEMasks;
47	} // namespace detail
48	} // namespace folly
49	#endif
50
51	namespace folly {
52	namespace detail {
53	extern const std::array<std::uint8_t, `256`> groupVarintLengths;
54	} // namespace detail
55	} // namespace folly
56
57	namespace folly {
58
59	template <typename T>
60	class GroupVarint;
61
62	/**
63	* GroupVarint encoding for 32-bit values.
64	*
65	* Encodes 4 32-bit integers at once, each using 1-4 bytes depending on size.
66	* There is one byte of overhead. (The first byte contains the lengths of
67	* the four integers encoded as two bits each; 00=1 byte .. 11=4 bytes)
68	*
69	* This implementation assumes little-endian and does unaligned 32-bit
70	* accesses, so it's basically not portable outside of the x86[_64] world.
71	*/
72	template <>
73	class GroupVarint<uint32_t> : public detail::GroupVarintBase<uint32_t> {
74	public:
75	/**
76	* Return the number of bytes used to encode these four values.
77	*/
78	static size_t size(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
79	return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d);
80	}
81
82	/**
83	* Return the number of bytes used to encode four uint32_t values stored
84	* at consecutive positions in an array.
85	*/
86	static size_t size(const uint32_t* p) {
87	return size(p[`0`], p[`1`], p[`2`], p[`3`]);
88	}
89
90	/**
91	* Return the number of bytes used to encode count (<= 4) values.
92	* If you clip a buffer after these many bytes, you can still decode
93	* the first "count" values correctly (if the remaining size() -
94	* partialSize() bytes are filled with garbage).
95	*/
96	static size_t partialSize(const type* p, size_t count) {
97	DCHECK_LE(count, kGroupSize);
98	size_t s = kHeaderSize + count;
99	for (; count; --count, ++p) {
100	s += key(*p);
101	}
102	return s;
103	}
104
105	/**
106	* Return the number of values from *p that are valid from an encoded
107	* buffer of size bytes.
108	*/
109	static size_t partialCount(const char* p, size_t size) {
110	uint8_t v = uint8_t(*p);
111	size_t s = kHeaderSize;
112	s += `1` + b0key(v);
113	if (s > size) {
114	return `0`;
115	}
116	s += `1` + b1key(v);
117	if (s > size) {
118	return `1`;
119	}
120	s += `1` + b2key(v);
121	if (s > size) {
122	return `2`;
123	}
124	s += `1` + b3key(v);
125	if (s > size) {
126	return `3`;
127	}
128	return `4`;
129	}
130
131	/**
132	* Given a pointer to the beginning of an GroupVarint32-encoded block,
133	* return the number of bytes used by the encoding.
134	*/
135	static size_t encodedSize(const char* p) {
136	return kHeaderSize + kGroupSize + b0key(uint8_t(p)) + b1key(uint8_t(p)) +
137	b2key(uint8_t(p)) + b3key(uint8_t(p));
138	}
139
140	/**
141	* Encode four uint32_t values into the buffer pointed-to by p, and return
142	* the next position in the buffer (that is, one character past the last
143	* encoded byte). p needs to have at least size()+4 bytes available.
144	*/
145	static char* encode(char* p, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
146	uint8_t b0key = key(a);
147	uint8_t b1key = key(b);
148	uint8_t b2key = key(c);
149	uint8_t b3key = key(d);
150	*p++ = (b3key << `6`) \| (b2key << `4`) \| (b1key << `2`) \| b0key;
151	storeUnaligned(p, a);
152	p += b0key + `1`;
153	storeUnaligned(p, b);
154	p += b1key + `1`;
155	storeUnaligned(p, c);
156	p += b2key + `1`;
157	storeUnaligned(p, d);
158	p += b3key + `1`;
159	return p;
160	}
161
162	/**
163	* Encode four uint32_t values from the array pointed-to by src into the
164	* buffer pointed-to by p, similar to encode(p,a,b,c,d) above.
165	*/
166	static char* encode(char* p, const uint32_t* src) {
167	return encode(p, src[`0`], src[`1`], src[`2`], src[`3`]);
168	}
169
170	/**
171	* Decode four uint32_t values from a buffer, and return the next position
172	* in the buffer (that is, one character past the last encoded byte).
173	* The buffer needs to have at least 3 extra bytes available (they
174	* may be read but ignored).
175	*/
176	static const char* decode_simple(
177	const char* p,
178	uint32_t* a,
179	uint32_t* b,
180	uint32_t* c,
181	uint32_t* d) {
182	size_t k = loadUnaligned<uint8_t>(p);
183	const char* end = p + detail::groupVarintLengths [k];
184	++p;
185	size_t k0 = b0key(k);
186	*a = loadUnaligned<uint32_t>(p) & kMask[k0];
187	p += k0 + `1`;
188	size_t k1 = b1key(k);
189	*b = loadUnaligned<uint32_t>(p) & kMask[k1];
190	p += k1 + `1`;
191	size_t k2 = b2key(k);
192	*c = loadUnaligned<uint32_t>(p) & kMask[k2];
193	p += k2 + `1`;
194	size_t k3 = b3key(k);
195	*d = loadUnaligned<uint32_t>(p) & kMask[k3];
196	// p += k3+1;
197	return end;
198	}
199
200	/**
201	* Decode four uint32_t values from a buffer and store them in the array
202	* pointed-to by dest, similar to decode(p,a,b,c,d) above.
203	*/
204	static const char* decode_simple(const char* p, uint32_t* dest) {
205	return decode_simple(p, dest, dest + `1`, dest + `2`, dest + `3`);
206	}
207
208	#if FOLLY_SSE >= 3
209	/**
210	* Just like the non-SSSE3 decode below, but with the additional constraint
211	* that we must be able to read at least 17 bytes from the input pointer, p.
212	*/
213	static const char* decode(const char* p, uint32_t* dest) {
214	uint8_t key = uint8_t(p[`0`]);
215	__m128i val = _mm_loadu_si128((const __m128i*)(p + `1`));
216	__m128i mask =
217	_mm_load_si128((const __m128i*)detail::groupVarintSSEMasks[key].data());
218	__m128i r = _mm_shuffle_epi8(val, mask);
219	_mm_storeu_si128((__m128i*)dest, r);
220	return p + detail::groupVarintLengths[key];
221	}
222
223	/**
224	* Just like decode_simple, but with the additional constraint that
225	* we must be able to read at least 17 bytes from the input pointer, p.
226	*/
227	static const char*
228	decode(const char* p, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) {
229	uint8_t key = uint8_t(p[`0`]);
230	__m128i val = _mm_loadu_si128((const __m128i*)(p + `1`));
231	__m128i mask =
232	_mm_load_si128((const __m128i*)detail::groupVarintSSEMasks[key].data());
233	__m128i r = _mm_shuffle_epi8(val, mask);
234
235	// Extracting 32 bits at a time out of an XMM register is a SSE4 feature
236	#if FOLLY_SSE >= 4
237	*a = uint32_t(_mm_extract_epi32(r, `0`));
238	*b = uint32_t(_mm_extract_epi32(r, `1`));
239	*c = uint32_t(_mm_extract_epi32(r, `2`));
240	*d = uint32_t(_mm_extract_epi32(r, `3`));
241	#else /* !__SSE4__ */
242	*a = _mm_extract_epi16(r, `0`) + (_mm_extract_epi16(r, `1`) << `16`);
243	*b = _mm_extract_epi16(r, `2`) + (_mm_extract_epi16(r, `3`) << `16`);
244	*c = _mm_extract_epi16(r, `4`) + (_mm_extract_epi16(r, `5`) << `16`);
245	*d = _mm_extract_epi16(r, `6`) + (_mm_extract_epi16(r, `7`) << `16`);
246	#endif /* __SSE4__ */
247
248	return p + detail::groupVarintLengths[key];
249	}
250
251	#else /* !__SSSE3__ */
252	static const char*
253	decode(const char* p, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) {
254	return decode_simple(p, a, b, c, d);
255	}
256
257	static const char* decode(const char* p, uint32_t* dest) {
258	return decode_simple(p, dest);
259	}
260	#endif /* __SSSE3__ */
261
262	private:
263	static uint8_t key(uint32_t x) {
264	// __builtin_clz is undefined for the x==0 case
265	return uint8_t(`3` - (__builtin_clz(x \| `1`) / `8`));
266	}
267	static size_t b0key(size_t x) {
268	return x & `3`;
269	}
270	static size_t b1key(size_t x) {
271	return (x >> `2`) & `3`;
272	}
273	static size_t b2key(size_t x) {
274	return (x >> `4`) & `3`;
275	}
276	static size_t b3key(size_t x) {
277	return (x >> `6`) & `3`;
278	}
279
280	static const uint32_t kMask[];
281	};
282
283	/**
284	* GroupVarint encoding for 64-bit values.
285	*
286	* Encodes 5 64-bit integers at once, each using 1-8 bytes depending on size.
287	* There are two bytes of overhead. (The first two bytes contain the lengths
288	* of the five integers encoded as three bits each; 000=1 byte .. 111 = 8 bytes)
289	*
290	* This implementation assumes little-endian and does unaligned 64-bit
291	* accesses, so it's basically not portable outside of the x86[_64] world.
292	*/
293	template <>
294	class GroupVarint<uint64_t> : public detail::GroupVarintBase<uint64_t> {
295	public:
296	/**
297	* Return the number of bytes used to encode these five values.
298	*/
299	static size_t
300	size(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e) {
301	return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d) +
302	key(e);
303	}
304
305	/**
306	* Return the number of bytes used to encode five uint64_t values stored
307	* at consecutive positions in an array.
308	*/
309	static size_t size(const uint64_t* p) {
310	return size(p[`0`], p[`1`], p[`2`], p[`3`], p[`4`]);
311	}
312
313	/**
314	* Return the number of bytes used to encode count (<= 4) values.
315	* If you clip a buffer after these many bytes, you can still decode
316	* the first "count" values correctly (if the remaining size() -
317	* partialSize() bytes are filled with garbage).
318	*/
319	static size_t partialSize(const type* p, size_t count) {
320	DCHECK_LE(count, kGroupSize);
321	size_t s = kHeaderSize + count;
322	for (; count; --count, ++p) {
323	s += key(*p);
324	}
325	return s;
326	}
327
328	/**
329	* Return the number of values from *p that are valid from an encoded
330	* buffer of size bytes.
331	*/
332	static size_t partialCount(const char* p, size_t size) {
333	uint16_t v = loadUnaligned<uint16_t>(p);
334	size_t s = kHeaderSize;
335	s += `1` + b0key(v);
336	if (s > size) {
337	return `0`;
338	}
339	s += `1` + b1key(v);
340	if (s > size) {
341	return `1`;
342	}
343	s += `1` + b2key(v);
344	if (s > size) {
345	return `2`;
346	}
347	s += `1` + b3key(v);
348	if (s > size) {
349	return `3`;
350	}
351	s += `1` + b4key(v);
352	if (s > size) {
353	return `4`;
354	}
355	return `5`;
356	}
357
358	/**
359	* Given a pointer to the beginning of an GroupVarint64-encoded block,
360	* return the number of bytes used by the encoding.
361	*/
362	static size_t encodedSize(const char* p) {
363	uint16_t n = loadUnaligned<uint16_t>(p);
364	return kHeaderSize + kGroupSize + b0key(n) + b1key(n) + b2key(n) +
365	b3key(n) + b4key(n);
366	}
367
368	/**
369	* Encode five uint64_t values into the buffer pointed-to by p, and return
370	* the next position in the buffer (that is, one character past the last
371	* encoded byte). p needs to have at least size()+8 bytes available.
372	*/
373	static char*
374	encode(char* p, uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e) {
375	uint16_t b0key = key(a);
376	uint16_t b1key = key(b);
377	uint16_t b2key = key(c);
378	uint16_t b3key = key(d);
379	uint16_t b4key = key(e);
380	storeUnaligned<uint16_t>(
381	p,
382	uint16_t(
383	(b4key << `12`) \| (b3key << `9`) \| (b2key << `6`) \| (b1key << `3`) \|
384	b0key));
385	p += `2`;
386	storeUnaligned(p, a);
387	p += b0key + `1`;
388	storeUnaligned(p, b);
389	p += b1key + `1`;
390	storeUnaligned(p, c);
391	p += b2key + `1`;
392	storeUnaligned(p, d);
393	p += b3key + `1`;
394	storeUnaligned(p, e);
395	p += b4key + `1`;
396	return p;
397	}
398
399	/**
400	* Encode five uint64_t values from the array pointed-to by src into the
401	* buffer pointed-to by p, similar to encode(p,a,b,c,d,e) above.
402	*/
403	static char* encode(char* p, const uint64_t* src) {
404	return encode(p, src[`0`], src[`1`], src[`2`], src[`3`], src[`4`]);
405	}
406
407	/**
408	* Decode five uint64_t values from a buffer, and return the next position
409	* in the buffer (that is, one character past the last encoded byte).
410	* The buffer needs to have at least 7 bytes available (they may be read
411	* but ignored).
412	*/
413	static const char* decode(
414	const char* p,
415	uint64_t* a,
416	uint64_t* b,
417	uint64_t* c,
418	uint64_t* d,
419	uint64_t* e) {
420	uint16_t k = loadUnaligned<uint16_t>(p);
421	p += `2`;
422	uint8_t k0 = b0key(k);
423	*a = loadUnaligned<uint64_t>(p) & kMask[k0];
424	p += k0 + `1`;
425	uint8_t k1 = b1key(k);
426	*b = loadUnaligned<uint64_t>(p) & kMask[k1];
427	p += k1 + `1`;
428	uint8_t k2 = b2key(k);
429	*c = loadUnaligned<uint64_t>(p) & kMask[k2];
430	p += k2 + `1`;
431	uint8_t k3 = b3key(k);
432	*d = loadUnaligned<uint64_t>(p) & kMask[k3];
433	p += k3 + `1`;
434	uint8_t k4 = b4key(k);
435	*e = loadUnaligned<uint64_t>(p) & kMask[k4];
436	p += k4 + `1`;
437	return p;
438	}
439
440	/**
441	* Decode five uint64_t values from a buffer and store them in the array
442	* pointed-to by dest, similar to decode(p,a,b,c,d,e) above.
443	*/
444	static const char* decode(const char* p, uint64_t* dest) {
445	return decode(p, dest, dest + `1`, dest + `2`, dest + `3`, dest + `4`);
446	}
447
448	private:
449	enum { kHeaderBytes = `2` };
450
451	static uint8_t key(uint64_t x) {
452	// __builtin_clzll is undefined for the x==0 case
453	return uint8_t(`7` - (__builtin_clzll(x \| `1`) / `8`));
454	}
455
456	static uint8_t b0key(uint16_t x) {
457	return x & `7u`;
458	}
459	static uint8_t b1key(uint16_t x) {
460	return (x >> `3`) & `7u`;
461	}
462	static uint8_t b2key(uint16_t x) {
463	return (x >> `6`) & `7u`;
464	}
465	static uint8_t b3key(uint16_t x) {
466	return (x >> `9`) & `7u`;
467	}
468	static uint8_t b4key(uint16_t x) {
469	return (x >> `12`) & `7u`;
470	}
471
472	static const uint64_t kMask[];
473	};
474
475	typedef GroupVarint<uint32_t> GroupVarint32;
476	typedef GroupVarint<uint64_t> GroupVarint64;
477
478	/**
479	* Simplify use of GroupVarint* for the case where data is available one
480	* entry at a time (instead of one group at a time). Handles buffering
481	* and an incomplete last chunk.
482	*
483	* Output is a function object that accepts character ranges:
484	* out(StringPiece) appends the given character range to the output.
485	*/
486	template <class T, class Output>
487	class GroupVarintEncoder {
488	public:
489	typedef GroupVarint<T> Base;
490	typedef T type;
491
492	explicit GroupVarintEncoder(Output out) : out_(out), count_(`0`) {}
493
494	~GroupVarintEncoder() {
495	finish();
496	}
497
498	/**
499	* Add a value to the encoder.
500	*/
501	void add(type val) {
502	buf_[count_++] = val;
503	if (count_ == Base::kGroupSize) {
504	char* p = Base::encode(tmp_, buf_);
505	out_(StringPiece(tmp_, p));
506	count_ = `0`;
507	}
508	}
509
510	/**
511	* Finish encoding, flushing any buffered values if necessary.
512	* After finish(), the encoder is immediately ready to encode more data
513	* to the same output.
514	*/
515	void finish() {
516	if (count_) {
517	// This is not strictly necessary, but it makes testing easy;
518	// uninitialized bytes are guaranteed to be recorded as taking one byte
519	// (not more).
520	for (size_t i = count_; i < Base::kGroupSize; i++) {
521	buf_[i] = `0`;
522	}
523	Base::encode(tmp_, buf_);
524	out_(StringPiece(tmp_, Base::partialSize(buf_, count_)));
525	count_ = `0`;
526	}
527	}
528
529	/**
530	* Return the appender that was used.
531	*/
532	Output& output() {
533	return out_;
534	}
535	const Output& output() const {
536	return out_;
537	}
538
539	/**
540	* Reset the encoder, disregarding any state (except what was already
541	* flushed to the output, of course).
542	*/
543	void clear() {
544	count_ = `0`;
545	}
546
547	private:
548	Output out_;
549	char tmp_[Base::kMaxSize];
550	type buf_[Base::kGroupSize];
551	size_t count_;
552	};
553
554	/**
555	* Simplify use of GroupVarint* for the case where the last group in the
556	* input may be incomplete (but the exact size of the input is known).
557	* Allows for extracting values one at a time.
558	*/
559	template <typename T>
560	class GroupVarintDecoder {
561	public:
562	typedef GroupVarint<T> Base;
563	typedef T type;
564
565	GroupVarintDecoder() = default;
566
567	explicit GroupVarintDecoder(StringPiece data, size_t maxCount = (size_t)-`1`)
568	: rrest_(data.end()),
569	p_(data.data()),
570	end_(data.end()),
571	limit_(end_),
572	pos_(`0`),
573	count_(`0`),
574	remaining_(maxCount) {}
575
576	void reset(StringPiece data, size_t maxCount = (size_t)-`1`) {
577	rrest_ = data.end();
578	p_ = data.data();
579	end_ = data.end();
580	limit_ = end_;
581	pos_ = `0`;
582	count_ = `0`;
583	remaining_ = maxCount;
584	}
585
586	/**
587	* Read and return the next value.
588	*/
589	bool next(type* val) {
590	if (pos_ == count_) {
591	// refill
592	size_t rem = size_t(end_ - p_);
593	if (rem == `0` \|\| remaining_ == `0`) {
594	return false;
595	}
596	// next() attempts to read one full group at a time, and so we must have
597	// at least enough bytes readable after its end to handle the case if the
598	// last group is full.
599	//
600	// The best way to ensure this is to ensure that data has at least
601	// Base::kMaxSize - 1 bytes readable after* the end, otherwise we'll copy*
602	// into a temporary buffer.
603	if (limit_ - p_ < Base::kMaxSize) {
604	memcpy(tmp_, p_, rem);
605	p_ = tmp_;
606	end_ = p_ + rem;
607	limit_ = tmp_ + sizeof(tmp_);
608	}
609	pos_ = `0`;
610	const char* n = Base::decode(p_, buf_);
611	if (n <= end_) {
612	// Full group could be decoded
613	if (remaining_ >= Base::kGroupSize) {
614	remaining_ -= Base::kGroupSize;
615	count_ = Base::kGroupSize;
616	p_ = n;
617	} else {
618	count_ = remaining_;
619	remaining_ = `0`;
620	p_ += Base::partialSize(buf_, count_);
621	}
622	} else {
623	// Can't decode a full group
624	count_ = Base::partialCount(p_, size_t(end_ - p_));
625	if (remaining_ >= count_) {
626	remaining_ -= count_;
627	p_ = end_;
628	} else {
629	count_ = remaining_;
630	remaining_ = `0`;
631	p_ += Base::partialSize(buf_, count_);
632	}
633	if (count_ == `0`) {
634	return false;
635	}
636	}
637	}
638	*val = buf_[pos_++];
639	return true;
640	}
641
642	StringPiece rest() const {
643	// This is only valid after next() returned false
644	CHECK(pos_ == count_ && (p_ == end_ \|\| remaining_ == `0`));
645	// p_ may point to the internal buffer (tmp_), but we want
646	// to return subpiece of the original data
647	size_t size = size_t(end_ - p_);
648	return StringPiece(rrest_ - size, rrest_);
649	}
650
651	private:
652	const char* rrest_;
653	const char* p_;
654	const char* end_;
655	const char* limit_;
656	char tmp_[`2` * Base::kMaxSize];
657	type buf_[Base::kGroupSize];
658	size_t pos_;
659	size_t count_;
660	size_t remaining_;
661	};
662
663	typedef GroupVarintDecoder<uint32_t> GroupVarint32Decoder;
664	typedef GroupVarintDecoder<uint64_t> GroupVarint64Decoder;
665
666	} // namespace folly
667
668	#endif // FOLLY_HAVE_GROUP_VARINT
669

Browse the source code of glow/thirdparty/folly/folly/GroupVarint.h