1/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_TSL_PLATFORM_TSTRING_H_
17#define TENSORFLOW_TSL_PLATFORM_TSTRING_H_
18
19#include <assert.h>
20
21#include <ostream>
22#include <string>
23
24#include "tensorflow/tsl/platform/cord.h"
25#include "tensorflow/tsl/platform/ctstring.h"
26#include "tensorflow/tsl/platform/stringpiece.h"
27
28namespace tsl {
29
30// tensorflow::tstring is the scalar type for DT_STRING tensors.
31//
32// tstrings are meant to be used when interfacing with string tensors, and
33// should not be considered as a general replacement for std::string in
34// tensorflow. The primary purpose of tstring is to provide a unified and
35// stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating
36// unnecessary conversions across language boundaries, and allowing for compiler
37// agnostic interoperability across dynamically loaded modules.
38//
39// In addition to ABI stability, tstrings features two string subtypes, VIEW and
40// OFFSET.
41//
42// VIEW tstrings are views into unowned character buffers; they can be used to
43// pass around existing character strings without incurring a per object heap
44// allocation. Note that, like std::string_view, it is the user's
45// responsibility to ensure that the underlying buffer of a VIEW tstring exceeds
46// the lifetime of the associated tstring object.
47//
48// TODO(dero): Methods for creating OFFSET tensors are not currently
49// implemented.
50//
51// OFFSET tstrings are platform independent offset defined strings which can be
52// directly mmaped or copied into a tensor buffer without the need for
53// deserialization or processing. For security reasons, it is imperative that
54// OFFSET based string tensors are validated before use, or are from a trusted
55// source.
56//
57// Underlying VIEW and OFFSET buffers are considered immutable, so l-value
58// assignment, mutation, or non-const access to data() of tstrings will result
59// in the conversion to an owned SMALL/LARGE type.
60//
61// The interface for tstring largely overlaps with std::string. Except where
62// noted, expect equivalent semantics with synonymous std::string methods.
63class tstring {
64 TF_TString tstr_;
65
66 public:
67 enum Type {
68 // See cstring.h
69 SMALL = TF_TSTR_SMALL,
70 LARGE = TF_TSTR_LARGE,
71 OFFSET = TF_TSTR_OFFSET,
72 VIEW = TF_TSTR_VIEW,
73 };
74
75 // Assignment to a tstring object with a tstring::view type will create a VIEW
76 // type tstring.
77 class view {
78 const char* data_;
79 size_t size_;
80
81 public:
82 explicit view(const char* data, size_t size) : data_(data), size_(size) {}
83 explicit view(const char* data) : data_(data), size_(::strlen(data)) {}
84
85 const char* data() const { return data_; }
86
87 size_t size() const { return size_; }
88
89 view() = delete;
90 view(const view&) = delete;
91 view& operator=(const view&) = delete;
92 };
93
94 typedef const char* const_iterator;
95
96 // Ctor
97 tstring();
98 tstring(const std::string& str); // NOLINT TODO(b/147740521): Make explicit.
99 tstring(const char* str, size_t len);
100 tstring(const char* str); // NOLINT TODO(b/147740521): Make explicit.
101 tstring(size_t n, char c);
102 explicit tstring(const StringPiece str);
103#ifdef PLATFORM_GOOGLE
104 explicit tstring(const absl::Cord& cord);
105#endif // PLATFORM_GOOGLE
106
107 // Copy
108 tstring(const tstring& str);
109
110 // Move
111 tstring(tstring&& str) noexcept;
112
113 // Dtor
114 ~tstring();
115
116 // Copy Assignment
117 tstring& operator=(const tstring& str);
118 tstring& operator=(const std::string& str);
119 tstring& operator=(const char* str);
120 tstring& operator=(char ch);
121 tstring& operator=(const StringPiece str);
122#ifdef PLATFORM_GOOGLE
123 tstring& operator=(const absl::Cord& cord);
124#endif // PLATFORM_GOOGLE
125
126 // View Assignment
127 tstring& operator=(const view& tsv);
128
129 // Move Assignment
130 tstring& operator=(tstring&& str);
131
132 // Comparison
133 int compare(const char* str, size_t len) const;
134 bool operator<(const tstring& o) const;
135 bool operator>(const tstring& o) const;
136 bool operator==(const char* str) const;
137 bool operator==(const tstring& o) const;
138 bool operator!=(const char* str) const;
139 bool operator!=(const tstring& o) const;
140
141 // Conversion Operators
142 // TODO(b/147740521): Make explicit.
143 operator std::string() const; // NOLINT
144 // TODO(b/147740521): Make explicit.
145 operator StringPiece() const; // NOLINT
146#ifdef PLATFORM_GOOGLE
147 template <typename T,
148 typename std::enable_if<std::is_same<T, absl::AlphaNum>::value,
149 T>::type* = nullptr>
150 operator T() const; // NOLINT TODO(b/147740521): Remove.
151#endif // PLATFORM_GOOGLE
152
153 // Attributes
154 size_t size() const;
155 size_t length() const;
156 size_t capacity() const;
157 bool empty() const;
158 Type type() const;
159
160 // Allocation
161 void resize(size_t new_size, char c = 0);
162 // Similar to resize, but will leave the newly grown region uninitialized.
163 void resize_uninitialized(size_t new_size);
164 void clear() noexcept;
165 void reserve(size_t n);
166
167 // Iterators
168 const_iterator begin() const;
169 const_iterator end() const;
170
171 // Const Element Access
172 const char* c_str() const;
173 const char* data() const;
174 const char& operator[](size_t i) const;
175 const char& back() const;
176
177 // Mutable Element Access
178 // NOTE: For VIEW/OFFSET types, calling these methods will result in the
179 // conversion to a SMALL or heap allocated LARGE type. As a result,
180 // previously obtained pointers, references, or iterators to the underlying
181 // buffer will point to the original VIEW/OFFSET and not the new allocation.
182 char* mdata();
183 char* data(); // DEPRECATED: Use mdata().
184 char& operator[](size_t i);
185
186 // Assignment
187 tstring& assign(const char* str, size_t len);
188 tstring& assign(const char* str);
189
190 // View Assignment
191 tstring& assign_as_view(const tstring& str);
192 tstring& assign_as_view(const std::string& str);
193 tstring& assign_as_view(const StringPiece str);
194 tstring& assign_as_view(const char* str, size_t len);
195 tstring& assign_as_view(const char* str);
196
197 // Modifiers
198 // NOTE: Invalid input will result in undefined behavior.
199 tstring& append(const tstring& str);
200 tstring& append(const char* str, size_t len);
201 tstring& append(const char* str);
202 tstring& append(size_t n, char c);
203
204 tstring& erase(size_t pos, size_t len);
205
206 tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen);
207 tstring& insert(size_t pos, size_t n, char c);
208 void swap(tstring& str);
209 void push_back(char ch);
210
211 // Friends
212 friend bool operator==(const char* a, const tstring& b);
213 friend bool operator==(const std::string& a, const tstring& b);
214 friend tstring operator+(const tstring& a, const tstring& b);
215 friend std::ostream& operator<<(std::ostream& o, const tstring& str);
216 friend std::hash<tstring>;
217};
218
219// Non-member function overloads
220
221bool operator==(const char* a, const tstring& b);
222bool operator==(const std::string& a, const tstring& b);
223tstring operator+(const tstring& a, const tstring& b);
224std::ostream& operator<<(std::ostream& o, const tstring& str);
225
226// Implementations
227
228// Ctor
229
230inline tstring::tstring() { TF_TString_Init(&tstr_); }
231
232inline tstring::tstring(const char* str, size_t len) {
233 TF_TString_Init(&tstr_);
234 TF_TString_Copy(&tstr_, str, len);
235}
236
237inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {}
238
239inline tstring::tstring(size_t n, char c) {
240 TF_TString_Init(&tstr_);
241 TF_TString_Resize(&tstr_, n, c);
242}
243
244inline tstring::tstring(const std::string& str)
245 : tstring(str.data(), str.size()) {}
246
247inline tstring::tstring(const StringPiece str)
248 : tstring(str.data(), str.size()) {}
249
250#ifdef PLATFORM_GOOGLE
251inline tstring::tstring(const absl::Cord& cord) {
252 TF_TString_Init(&tstr_);
253 TF_TString_ResizeUninitialized(&tstr_, cord.size());
254
255 cord.CopyToArray(data());
256}
257#endif // PLATFORM_GOOGLE
258
259// Copy
260
261inline tstring::tstring(const tstring& str) {
262 TF_TString_Init(&tstr_);
263 TF_TString_Assign(&tstr_, &str.tstr_);
264}
265
266// Move
267
268inline tstring::tstring(tstring&& str) noexcept {
269 TF_TString_Init(&tstr_);
270 TF_TString_Move(&tstr_, &str.tstr_);
271}
272
273// Dtor
274
275inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); }
276
277// Copy Assignment
278
279inline tstring& tstring::operator=(const tstring& str) {
280 TF_TString_Assign(&tstr_, &str.tstr_);
281
282 return *this;
283}
284
285inline tstring& tstring::operator=(const std::string& str) {
286 TF_TString_Copy(&tstr_, str.data(), str.size());
287 return *this;
288}
289
290inline tstring& tstring::operator=(const char* str) {
291 TF_TString_Copy(&tstr_, str, ::strlen(str));
292
293 return *this;
294}
295
296inline tstring& tstring::operator=(char c) {
297 resize_uninitialized(1);
298 (*this)[0] = c;
299
300 return *this;
301}
302
303inline tstring& tstring::operator=(const StringPiece str) {
304 TF_TString_Copy(&tstr_, str.data(), str.size());
305
306 return *this;
307}
308
309#ifdef PLATFORM_GOOGLE
310inline tstring& tstring::operator=(const absl::Cord& cord) {
311 TF_TString_ResizeUninitialized(&tstr_, cord.size());
312
313 cord.CopyToArray(data());
314
315 return *this;
316}
317#endif // PLATFORM_GOOGLE
318
319// View Assignment
320
321inline tstring& tstring::operator=(const tstring::view& tsv) {
322 assign_as_view(tsv.data(), tsv.size());
323
324 return *this;
325}
326
327// Move Assignment
328
329inline tstring& tstring::operator=(tstring&& str) {
330 TF_TString_Move(&tstr_, &str.tstr_);
331
332 return *this;
333}
334
335// Comparison
336
337inline int tstring::compare(const char* str, size_t len) const {
338 int ret = ::memcmp(data(), str, std::min(len, size()));
339
340 if (ret < 0) return -1;
341 if (ret > 0) return +1;
342
343 if (size() < len) return -1;
344 if (size() > len) return +1;
345
346 return 0;
347}
348
349inline bool tstring::operator<(const tstring& o) const {
350 return compare(o.data(), o.size()) < 0;
351}
352
353inline bool tstring::operator>(const tstring& o) const {
354 return compare(o.data(), o.size()) > 0;
355}
356
357inline bool tstring::operator==(const char* str) const {
358 return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0;
359}
360
361inline bool tstring::operator==(const tstring& o) const {
362 return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0;
363}
364
365inline bool tstring::operator!=(const char* str) const {
366 return !(*this == str);
367}
368
369inline bool tstring::operator!=(const tstring& o) const {
370 return !(*this == o);
371}
372
373// Conversion Operators
374
375inline tstring::operator std::string() const {
376 return std::string(data(), size());
377}
378
379inline tstring::operator StringPiece() const {
380 return StringPiece(data(), size());
381}
382
383#ifdef PLATFORM_GOOGLE
384template <typename T, typename std::enable_if<
385 std::is_same<T, absl::AlphaNum>::value, T>::type*>
386inline tstring::operator T() const {
387 return T(StringPiece(*this));
388}
389#endif // PLATFORM_GOOGLE
390
391// Attributes
392
393inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); }
394
395inline size_t tstring::length() const { return size(); }
396
397inline size_t tstring::capacity() const {
398 return TF_TString_GetCapacity(&tstr_);
399}
400
401inline bool tstring::empty() const { return size() == 0; }
402
403inline tstring::Type tstring::type() const {
404 return static_cast<tstring::Type>(TF_TString_GetType(&tstr_));
405}
406
407// Allocation
408
409inline void tstring::resize(size_t new_size, char c) {
410 TF_TString_Resize(&tstr_, new_size, c);
411}
412
413inline void tstring::resize_uninitialized(size_t new_size) {
414 TF_TString_ResizeUninitialized(&tstr_, new_size);
415}
416
417inline void tstring::clear() noexcept {
418 TF_TString_ResizeUninitialized(&tstr_, 0);
419}
420
421inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); }
422
423// Iterators
424
425inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; }
426inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; }
427
428// Element Access
429
430inline const char* tstring::c_str() const { return data(); }
431
432inline const char* tstring::data() const {
433 return TF_TString_GetDataPointer(&tstr_);
434}
435
436inline const char& tstring::operator[](size_t i) const { return data()[i]; }
437
438inline const char& tstring::back() const { return (*this)[size() - 1]; }
439
440inline char* tstring::mdata() {
441 return TF_TString_GetMutableDataPointer(&tstr_);
442}
443
444inline char* tstring::data() {
445 // Deprecated
446 return mdata();
447}
448
449inline char& tstring::operator[](size_t i) { return mdata()[i]; }
450
451// Assignment
452
453inline tstring& tstring::assign(const char* str, size_t len) {
454 TF_TString_Copy(&tstr_, str, len);
455
456 return *this;
457}
458
459inline tstring& tstring::assign(const char* str) {
460 assign(str, ::strlen(str));
461
462 return *this;
463}
464
465// View Assignment
466
467inline tstring& tstring::assign_as_view(const tstring& str) {
468 assign_as_view(str.data(), str.size());
469
470 return *this;
471}
472
473inline tstring& tstring::assign_as_view(const std::string& str) {
474 assign_as_view(str.data(), str.size());
475
476 return *this;
477}
478
479inline tstring& tstring::assign_as_view(const StringPiece str) {
480 assign_as_view(str.data(), str.size());
481
482 return *this;
483}
484
485inline tstring& tstring::assign_as_view(const char* str, size_t len) {
486 TF_TString_AssignView(&tstr_, str, len);
487
488 return *this;
489}
490
491inline tstring& tstring::assign_as_view(const char* str) {
492 assign_as_view(str, ::strlen(str));
493
494 return *this;
495}
496
497// Modifiers
498
499inline tstring& tstring::append(const tstring& str) {
500 TF_TString_Append(&tstr_, &str.tstr_);
501
502 return *this;
503}
504
505inline tstring& tstring::append(const char* str, size_t len) {
506 TF_TString_AppendN(&tstr_, str, len);
507
508 return *this;
509}
510
511inline tstring& tstring::append(const char* str) {
512 append(str, ::strlen(str));
513
514 return *this;
515}
516
517inline tstring& tstring::append(size_t n, char c) {
518 // For append use cases, we want to ensure amortized growth.
519 const size_t new_size = size() + n;
520 TF_TString_ReserveAmortized(&tstr_, new_size);
521 resize(new_size, c);
522
523 return *this;
524}
525
526inline tstring& tstring::erase(size_t pos, size_t len) {
527 memmove(mdata() + pos, data() + pos + len, size() - len - pos);
528
529 resize(size() - len);
530
531 return *this;
532}
533
534inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos,
535 size_t sublen) {
536 size_t orig_size = size();
537 TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen);
538
539 memmove(mdata() + pos + sublen, data() + pos, orig_size - pos);
540 memmove(mdata() + pos, str.data() + subpos, sublen);
541
542 return *this;
543}
544
545inline tstring& tstring::insert(size_t pos, size_t n, char c) {
546 size_t size_ = size();
547 TF_TString_ResizeUninitialized(&tstr_, size_ + n);
548
549 memmove(mdata() + pos + n, data() + pos, size_ - pos);
550 memset(mdata() + pos, c, n);
551
552 return *this;
553}
554
555inline void tstring::swap(tstring& str) {
556 // TODO(dero): Invalid for OFFSET (unimplemented).
557 std::swap(tstr_, str.tstr_);
558}
559
560inline void tstring::push_back(char ch) { append(1, ch); }
561
562// Friends
563
564inline bool operator==(const char* a, const tstring& b) {
565 return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0;
566}
567
568inline bool operator==(const std::string& a, const tstring& b) {
569 return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0;
570}
571
572inline tstring operator+(const tstring& a, const tstring& b) {
573 tstring r;
574 r.reserve(a.size() + b.size());
575 r.append(a);
576 r.append(b);
577
578 return r;
579}
580
581inline std::ostream& operator<<(std::ostream& o, const tstring& str) {
582 return o.write(str.data(), str.size());
583}
584
585} // namespace tsl
586
587#endif // TENSORFLOW_TSL_PLATFORM_TSTRING_H_
588