1//===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_TWINE_H
10#define LLVM_ADT_TWINE_H
11
12#include "llvm/ADT/SmallVector.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/ErrorHandling.h"
15#include <cassert>
16#include <cstdint>
17#include <string>
18#if __cplusplus > 201402L
19#include <string_view>
20#endif
21
22namespace llvm {
23
24 class formatv_object_base;
25 class raw_ostream;
26
27 /// Twine - A lightweight data structure for efficiently representing the
28 /// concatenation of temporary values as strings.
29 ///
30 /// A Twine is a kind of rope, it represents a concatenated string using a
31 /// binary-tree, where the string is the preorder of the nodes. Since the
32 /// Twine can be efficiently rendered into a buffer when its result is used,
33 /// it avoids the cost of generating temporary values for intermediate string
34 /// results -- particularly in cases when the Twine result is never
35 /// required. By explicitly tracking the type of leaf nodes, we can also avoid
36 /// the creation of temporary strings for conversions operations (such as
37 /// appending an integer to a string).
38 ///
39 /// A Twine is not intended for use directly and should not be stored, its
40 /// implementation relies on the ability to store pointers to temporary stack
41 /// objects which may be deallocated at the end of a statement. Twines should
42 /// only be used accepted as const references in arguments, when an API wishes
43 /// to accept possibly-concatenated strings.
44 ///
45 /// Twines support a special 'null' value, which always concatenates to form
46 /// itself, and renders as an empty string. This can be returned from APIs to
47 /// effectively nullify any concatenations performed on the result.
48 ///
49 /// \b Implementation
50 ///
51 /// Given the nature of a Twine, it is not possible for the Twine's
52 /// concatenation method to construct interior nodes; the result must be
53 /// represented inside the returned value. For this reason a Twine object
54 /// actually holds two values, the left- and right-hand sides of a
55 /// concatenation. We also have nullary Twine objects, which are effectively
56 /// sentinel values that represent empty strings.
57 ///
58 /// Thus, a Twine can effectively have zero, one, or two children. The \see
59 /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
60 /// testing the number of children.
61 ///
62 /// We maintain a number of invariants on Twine objects (FIXME: Why):
63 /// - Nullary twines are always represented with their Kind on the left-hand
64 /// side, and the Empty kind on the right-hand side.
65 /// - Unary twines are always represented with the value on the left-hand
66 /// side, and the Empty kind on the right-hand side.
67 /// - If a Twine has another Twine as a child, that child should always be
68 /// binary (otherwise it could have been folded into the parent).
69 ///
70 /// These invariants are check by \see isValid().
71 ///
72 /// \b Efficiency Considerations
73 ///
74 /// The Twine is designed to yield efficient and small code for common
75 /// situations. For this reason, the concat() method is inlined so that
76 /// concatenations of leaf nodes can be optimized into stores directly into a
77 /// single stack allocated object.
78 ///
79 /// In practice, not all compilers can be trusted to optimize concat() fully,
80 /// so we provide two additional methods (and accompanying operator+
81 /// overloads) to guarantee that particularly important cases (cstring plus
82 /// StringRef) codegen as desired.
83 class Twine {
84 /// NodeKind - Represent the type of an argument.
85 enum NodeKind : unsigned char {
86 /// An empty string; the result of concatenating anything with it is also
87 /// empty.
88 NullKind,
89
90 /// The empty string.
91 EmptyKind,
92
93 /// A pointer to a Twine instance.
94 TwineKind,
95
96 /// A pointer to a C string instance.
97 CStringKind,
98
99 /// A pointer to an std::string instance.
100 StdStringKind,
101
102 /// A Pointer and Length representation. Used for std::string_view,
103 /// StringRef, and SmallString. Can't use a StringRef here
104 /// because they are not trivally constructible.
105 PtrAndLengthKind,
106
107 /// A pointer to a formatv_object_base instance.
108 FormatvObjectKind,
109
110 /// A char value, to render as a character.
111 CharKind,
112
113 /// An unsigned int value, to render as an unsigned decimal integer.
114 DecUIKind,
115
116 /// An int value, to render as a signed decimal integer.
117 DecIKind,
118
119 /// A pointer to an unsigned long value, to render as an unsigned decimal
120 /// integer.
121 DecULKind,
122
123 /// A pointer to a long value, to render as a signed decimal integer.
124 DecLKind,
125
126 /// A pointer to an unsigned long long value, to render as an unsigned
127 /// decimal integer.
128 DecULLKind,
129
130 /// A pointer to a long long value, to render as a signed decimal integer.
131 DecLLKind,
132
133 /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
134 /// integer.
135 UHexKind
136 };
137
138 union Child
139 {
140 const Twine *twine;
141 const char *cString;
142 const std::string *stdString;
143 struct {
144 const char *ptr;
145 size_t length;
146 } ptrAndLength;
147 const formatv_object_base *formatvObject;
148 char character;
149 unsigned int decUI;
150 int decI;
151 const unsigned long *decUL;
152 const long *decL;
153 const unsigned long long *decULL;
154 const long long *decLL;
155 const uint64_t *uHex;
156 };
157
158 /// LHS - The prefix in the concatenation, which may be uninitialized for
159 /// Null or Empty kinds.
160 Child LHS;
161
162 /// RHS - The suffix in the concatenation, which may be uninitialized for
163 /// Null or Empty kinds.
164 Child RHS;
165
166 /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
167 NodeKind LHSKind = EmptyKind;
168
169 /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
170 NodeKind RHSKind = EmptyKind;
171
172 /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
173 explicit Twine(NodeKind Kind) : LHSKind(Kind) {
174 assert(isNullary() && "Invalid kind!");
175 }
176
177 /// Construct a binary twine.
178 explicit Twine(const Twine &LHS, const Twine &RHS)
179 : LHSKind(TwineKind), RHSKind(TwineKind) {
180 this->LHS.twine = &LHS;
181 this->RHS.twine = &RHS;
182 assert(isValid() && "Invalid twine!");
183 }
184
185 /// Construct a twine from explicit values.
186 explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
187 : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
188 assert(isValid() && "Invalid twine!");
189 }
190
191 /// Check for the null twine.
192 bool isNull() const {
193 return getLHSKind() == NullKind;
194 }
195
196 /// Check for the empty twine.
197 bool isEmpty() const {
198 return getLHSKind() == EmptyKind;
199 }
200
201 /// Check if this is a nullary twine (null or empty).
202 bool isNullary() const {
203 return isNull() || isEmpty();
204 }
205
206 /// Check if this is a unary twine.
207 bool isUnary() const {
208 return getRHSKind() == EmptyKind && !isNullary();
209 }
210
211 /// Check if this is a binary twine.
212 bool isBinary() const {
213 return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
214 }
215
216 /// Check if this is a valid twine (satisfying the invariants on
217 /// order and number of arguments).
218 bool isValid() const {
219 // Nullary twines always have Empty on the RHS.
220 if (isNullary() && getRHSKind() != EmptyKind)
221 return false;
222
223 // Null should never appear on the RHS.
224 if (getRHSKind() == NullKind)
225 return false;
226
227 // The RHS cannot be non-empty if the LHS is empty.
228 if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
229 return false;
230
231 // A twine child should always be binary.
232 if (getLHSKind() == TwineKind &&
233 !LHS.twine->isBinary())
234 return false;
235 if (getRHSKind() == TwineKind &&
236 !RHS.twine->isBinary())
237 return false;
238
239 return true;
240 }
241
242 /// Get the NodeKind of the left-hand side.
243 NodeKind getLHSKind() const { return LHSKind; }
244
245 /// Get the NodeKind of the right-hand side.
246 NodeKind getRHSKind() const { return RHSKind; }
247
248 /// Print one child from a twine.
249 void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
250
251 /// Print the representation of one child from a twine.
252 void printOneChildRepr(raw_ostream &OS, Child Ptr,
253 NodeKind Kind) const;
254
255 public:
256 /// @name Constructors
257 /// @{
258
259 /// Construct from an empty string.
260 /*implicit*/ Twine() {
261 assert(isValid() && "Invalid twine!");
262 }
263
264 Twine(const Twine &) = default;
265
266 /// Construct from a C string.
267 ///
268 /// We take care here to optimize "" into the empty twine -- this will be
269 /// optimized out for string constants. This allows Twine arguments have
270 /// default "" values, without introducing unnecessary string constants.
271 /*implicit*/ Twine(const char *Str) {
272 if (Str[0] != '\0') {
273 LHS.cString = Str;
274 LHSKind = CStringKind;
275 } else
276 LHSKind = EmptyKind;
277
278 assert(isValid() && "Invalid twine!");
279 }
280 /// Delete the implicit conversion from nullptr as Twine(const char *)
281 /// cannot take nullptr.
282 /*implicit*/ Twine(std::nullptr_t) = delete;
283
284 /// Construct from an std::string.
285 /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
286 LHS.stdString = &Str;
287 assert(isValid() && "Invalid twine!");
288 }
289
290#if __cplusplus > 201402L
291 /// Construct from an std::string_view by converting it to a pointer and
292 /// length. This handles string_views on a pure API basis, and avoids
293 /// storing one (or a pointer to one) inside a Twine, which avoids problems
294 /// when mixing code compiled under various C++ standards.
295 /*implicit*/ Twine(const std::string_view &Str)
296 : LHSKind(PtrAndLengthKind) {
297 LHS.ptrAndLength.ptr = Str.data();
298 LHS.ptrAndLength.length = Str.length();
299 assert(isValid() && "Invalid twine!");
300 }
301#endif
302
303 /// Construct from a StringRef.
304 /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) {
305 LHS.ptrAndLength.ptr = Str.data();
306 LHS.ptrAndLength.length = Str.size();
307 assert(isValid() && "Invalid twine!");
308 }
309
310 /// Construct from a SmallString.
311 /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
312 : LHSKind(PtrAndLengthKind) {
313 LHS.ptrAndLength.ptr = Str.data();
314 LHS.ptrAndLength.length = Str.size();
315 assert(isValid() && "Invalid twine!");
316 }
317
318 /// Construct from a formatv_object_base.
319 /*implicit*/ Twine(const formatv_object_base &Fmt)
320 : LHSKind(FormatvObjectKind) {
321 LHS.formatvObject = &Fmt;
322 assert(isValid() && "Invalid twine!");
323 }
324
325 /// Construct from a char.
326 explicit Twine(char Val) : LHSKind(CharKind) {
327 LHS.character = Val;
328 }
329
330 /// Construct from a signed char.
331 explicit Twine(signed char Val) : LHSKind(CharKind) {
332 LHS.character = static_cast<char>(Val);
333 }
334
335 /// Construct from an unsigned char.
336 explicit Twine(unsigned char Val) : LHSKind(CharKind) {
337 LHS.character = static_cast<char>(Val);
338 }
339
340 /// Construct a twine to print \p Val as an unsigned decimal integer.
341 explicit Twine(unsigned Val) : LHSKind(DecUIKind) {
342 LHS.decUI = Val;
343 }
344
345 /// Construct a twine to print \p Val as a signed decimal integer.
346 explicit Twine(int Val) : LHSKind(DecIKind) {
347 LHS.decI = Val;
348 }
349
350 /// Construct a twine to print \p Val as an unsigned decimal integer.
351 explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) {
352 LHS.decUL = &Val;
353 }
354
355 /// Construct a twine to print \p Val as a signed decimal integer.
356 explicit Twine(const long &Val) : LHSKind(DecLKind) {
357 LHS.decL = &Val;
358 }
359
360 /// Construct a twine to print \p Val as an unsigned decimal integer.
361 explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) {
362 LHS.decULL = &Val;
363 }
364
365 /// Construct a twine to print \p Val as a signed decimal integer.
366 explicit Twine(const long long &Val) : LHSKind(DecLLKind) {
367 LHS.decLL = &Val;
368 }
369
370 // FIXME: Unfortunately, to make sure this is as efficient as possible we
371 // need extra binary constructors from particular types. We can't rely on
372 // the compiler to be smart enough to fold operator+()/concat() down to the
373 // right thing. Yet.
374
375 /// Construct as the concatenation of a C string and a StringRef.
376 /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
377 : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) {
378 this->LHS.cString = LHS;
379 this->RHS.ptrAndLength.ptr = RHS.data();
380 this->RHS.ptrAndLength.length = RHS.size();
381 assert(isValid() && "Invalid twine!");
382 }
383
384 /// Construct as the concatenation of a StringRef and a C string.
385 /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
386 : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) {
387 this->LHS.ptrAndLength.ptr = LHS.data();
388 this->LHS.ptrAndLength.length = LHS.size();
389 this->RHS.cString = RHS;
390 assert(isValid() && "Invalid twine!");
391 }
392
393 /// Since the intended use of twines is as temporary objects, assignments
394 /// when concatenating might cause undefined behavior or stack corruptions
395 Twine &operator=(const Twine &) = delete;
396
397 /// Create a 'null' string, which is an empty string that always
398 /// concatenates to form another empty string.
399 static Twine createNull() {
400 return Twine(NullKind);
401 }
402
403 /// @}
404 /// @name Numeric Conversions
405 /// @{
406
407 // Construct a twine to print \p Val as an unsigned hexadecimal integer.
408 static Twine utohexstr(const uint64_t &Val) {
409 Child LHS, RHS;
410 LHS.uHex = &Val;
411 RHS.twine = nullptr;
412 return Twine(LHS, UHexKind, RHS, EmptyKind);
413 }
414
415 /// @}
416 /// @name Predicate Operations
417 /// @{
418
419 /// Check if this twine is trivially empty; a false return value does not
420 /// necessarily mean the twine is empty.
421 bool isTriviallyEmpty() const {
422 return isNullary();
423 }
424
425 /// Return true if this twine can be dynamically accessed as a single
426 /// StringRef value with getSingleStringRef().
427 bool isSingleStringRef() const {
428 if (getRHSKind() != EmptyKind) return false;
429
430 switch (getLHSKind()) {
431 case EmptyKind:
432 case CStringKind:
433 case StdStringKind:
434 case PtrAndLengthKind:
435 return true;
436 default:
437 return false;
438 }
439 }
440
441 /// @}
442 /// @name String Operations
443 /// @{
444
445 Twine concat(const Twine &Suffix) const;
446
447 /// @}
448 /// @name Output & Conversion.
449 /// @{
450
451 /// Return the twine contents as a std::string.
452 std::string str() const;
453
454 /// Append the concatenated string into the given SmallString or SmallVector.
455 void toVector(SmallVectorImpl<char> &Out) const;
456
457 /// This returns the twine as a single StringRef. This method is only valid
458 /// if isSingleStringRef() is true.
459 StringRef getSingleStringRef() const {
460 assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
461 switch (getLHSKind()) {
462 default: llvm_unreachable("Out of sync with isSingleStringRef");
463 case EmptyKind:
464 return StringRef();
465 case CStringKind:
466 return StringRef(LHS.cString);
467 case StdStringKind:
468 return StringRef(*LHS.stdString);
469 case PtrAndLengthKind:
470 return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length);
471 }
472 }
473
474 /// This returns the twine as a single StringRef if it can be
475 /// represented as such. Otherwise the twine is written into the given
476 /// SmallVector and a StringRef to the SmallVector's data is returned.
477 StringRef toStringRef(SmallVectorImpl<char> &Out) const {
478 if (isSingleStringRef())
479 return getSingleStringRef();
480 toVector(Out);
481 return StringRef(Out.data(), Out.size());
482 }
483
484 /// This returns the twine as a single null terminated StringRef if it
485 /// can be represented as such. Otherwise the twine is written into the
486 /// given SmallVector and a StringRef to the SmallVector's data is returned.
487 ///
488 /// The returned StringRef's size does not include the null terminator.
489 StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
490
491 /// Write the concatenated string represented by this twine to the
492 /// stream \p OS.
493 void print(raw_ostream &OS) const;
494
495 /// Dump the concatenated string represented by this twine to stderr.
496 void dump() const;
497
498 /// Write the representation of this twine to the stream \p OS.
499 void printRepr(raw_ostream &OS) const;
500
501 /// Dump the representation of this twine to stderr.
502 void dumpRepr() const;
503
504 /// @}
505 };
506
507 /// @name Twine Inline Implementations
508 /// @{
509
510 inline Twine Twine::concat(const Twine &Suffix) const {
511 // Concatenation with null is null.
512 if (isNull() || Suffix.isNull())
513 return Twine(NullKind);
514
515 // Concatenation with empty yields the other side.
516 if (isEmpty())
517 return Suffix;
518 if (Suffix.isEmpty())
519 return *this;
520
521 // Otherwise we need to create a new node, taking care to fold in unary
522 // twines.
523 Child NewLHS, NewRHS;
524 NewLHS.twine = this;
525 NewRHS.twine = &Suffix;
526 NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
527 if (isUnary()) {
528 NewLHS = LHS;
529 NewLHSKind = getLHSKind();
530 }
531 if (Suffix.isUnary()) {
532 NewRHS = Suffix.LHS;
533 NewRHSKind = Suffix.getLHSKind();
534 }
535
536 return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
537 }
538
539 inline Twine operator+(const Twine &LHS, const Twine &RHS) {
540 return LHS.concat(RHS);
541 }
542
543 /// Additional overload to guarantee simplified codegen; this is equivalent to
544 /// concat().
545
546 inline Twine operator+(const char *LHS, const StringRef &RHS) {
547 return Twine(LHS, RHS);
548 }
549
550 /// Additional overload to guarantee simplified codegen; this is equivalent to
551 /// concat().
552
553 inline Twine operator+(const StringRef &LHS, const char *RHS) {
554 return Twine(LHS, RHS);
555 }
556
557 inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
558 RHS.print(OS);
559 return OS;
560 }
561
562 /// @}
563
564} // end namespace llvm
565
566#endif // LLVM_ADT_TWINE_H
567