1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_TSL_PLATFORM_TSTRING_H_ |
17 | #define TENSORFLOW_TSL_PLATFORM_TSTRING_H_ |
18 | |
19 | #include <assert.h> |
20 | |
21 | #include <ostream> |
22 | #include <string> |
23 | |
24 | #include "tensorflow/tsl/platform/cord.h" |
25 | #include "tensorflow/tsl/platform/ctstring.h" |
26 | #include "tensorflow/tsl/platform/stringpiece.h" |
27 | |
28 | namespace tsl { |
29 | |
30 | // tensorflow::tstring is the scalar type for DT_STRING tensors. |
31 | // |
32 | // tstrings are meant to be used when interfacing with string tensors, and |
33 | // should not be considered as a general replacement for std::string in |
34 | // tensorflow. The primary purpose of tstring is to provide a unified and |
35 | // stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating |
36 | // unnecessary conversions across language boundaries, and allowing for compiler |
37 | // agnostic interoperability across dynamically loaded modules. |
38 | // |
39 | // In addition to ABI stability, tstrings features two string subtypes, VIEW and |
40 | // OFFSET. |
41 | // |
42 | // VIEW tstrings are views into unowned character buffers; they can be used to |
43 | // pass around existing character strings without incurring a per object heap |
44 | // allocation. Note that, like std::string_view, it is the user's |
45 | // responsibility to ensure that the underlying buffer of a VIEW tstring exceeds |
46 | // the lifetime of the associated tstring object. |
47 | // |
48 | // TODO(dero): Methods for creating OFFSET tensors are not currently |
49 | // implemented. |
50 | // |
51 | // OFFSET tstrings are platform independent offset defined strings which can be |
52 | // directly mmaped or copied into a tensor buffer without the need for |
53 | // deserialization or processing. For security reasons, it is imperative that |
54 | // OFFSET based string tensors are validated before use, or are from a trusted |
55 | // source. |
56 | // |
57 | // Underlying VIEW and OFFSET buffers are considered immutable, so l-value |
58 | // assignment, mutation, or non-const access to data() of tstrings will result |
59 | // in the conversion to an owned SMALL/LARGE type. |
60 | // |
61 | // The interface for tstring largely overlaps with std::string. Except where |
62 | // noted, expect equivalent semantics with synonymous std::string methods. |
63 | class tstring { |
64 | TF_TString tstr_; |
65 | |
66 | public: |
67 | enum Type { |
68 | // See cstring.h |
69 | SMALL = TF_TSTR_SMALL, |
70 | LARGE = TF_TSTR_LARGE, |
71 | OFFSET = TF_TSTR_OFFSET, |
72 | VIEW = TF_TSTR_VIEW, |
73 | }; |
74 | |
75 | // Assignment to a tstring object with a tstring::view type will create a VIEW |
76 | // type tstring. |
77 | class view { |
78 | const char* data_; |
79 | size_t size_; |
80 | |
81 | public: |
82 | explicit view(const char* data, size_t size) : data_(data), size_(size) {} |
83 | explicit view(const char* data) : data_(data), size_(::strlen(data)) {} |
84 | |
85 | const char* data() const { return data_; } |
86 | |
87 | size_t size() const { return size_; } |
88 | |
89 | view() = delete; |
90 | view(const view&) = delete; |
91 | view& operator=(const view&) = delete; |
92 | }; |
93 | |
94 | typedef const char* const_iterator; |
95 | |
96 | // Ctor |
97 | tstring(); |
98 | tstring(const std::string& str); // NOLINT TODO(b/147740521): Make explicit. |
99 | tstring(const char* str, size_t len); |
100 | tstring(const char* str); // NOLINT TODO(b/147740521): Make explicit. |
101 | tstring(size_t n, char c); |
102 | explicit tstring(const StringPiece str); |
103 | #ifdef PLATFORM_GOOGLE |
104 | explicit tstring(const absl::Cord& cord); |
105 | #endif // PLATFORM_GOOGLE |
106 | |
107 | // Copy |
108 | tstring(const tstring& str); |
109 | |
110 | // Move |
111 | tstring(tstring&& str) noexcept; |
112 | |
113 | // Dtor |
114 | ~tstring(); |
115 | |
116 | // Copy Assignment |
117 | tstring& operator=(const tstring& str); |
118 | tstring& operator=(const std::string& str); |
119 | tstring& operator=(const char* str); |
120 | tstring& operator=(char ch); |
121 | tstring& operator=(const StringPiece str); |
122 | #ifdef PLATFORM_GOOGLE |
123 | tstring& operator=(const absl::Cord& cord); |
124 | #endif // PLATFORM_GOOGLE |
125 | |
126 | // View Assignment |
127 | tstring& operator=(const view& tsv); |
128 | |
129 | // Move Assignment |
130 | tstring& operator=(tstring&& str); |
131 | |
132 | // Comparison |
133 | int compare(const char* str, size_t len) const; |
134 | bool operator<(const tstring& o) const; |
135 | bool operator>(const tstring& o) const; |
136 | bool operator==(const char* str) const; |
137 | bool operator==(const tstring& o) const; |
138 | bool operator!=(const char* str) const; |
139 | bool operator!=(const tstring& o) const; |
140 | |
141 | // Conversion Operators |
142 | // TODO(b/147740521): Make explicit. |
143 | operator std::string() const; // NOLINT |
144 | // TODO(b/147740521): Make explicit. |
145 | operator StringPiece() const; // NOLINT |
146 | #ifdef PLATFORM_GOOGLE |
147 | template <typename T, |
148 | typename std::enable_if<std::is_same<T, absl::AlphaNum>::value, |
149 | T>::type* = nullptr> |
150 | operator T() const; // NOLINT TODO(b/147740521): Remove. |
151 | #endif // PLATFORM_GOOGLE |
152 | |
153 | // Attributes |
154 | size_t size() const; |
155 | size_t length() const; |
156 | size_t capacity() const; |
157 | bool empty() const; |
158 | Type type() const; |
159 | |
160 | // Allocation |
161 | void resize(size_t new_size, char c = 0); |
162 | // Similar to resize, but will leave the newly grown region uninitialized. |
163 | void resize_uninitialized(size_t new_size); |
164 | void clear() noexcept; |
165 | void reserve(size_t n); |
166 | |
167 | // Iterators |
168 | const_iterator begin() const; |
169 | const_iterator end() const; |
170 | |
171 | // Const Element Access |
172 | const char* c_str() const; |
173 | const char* data() const; |
174 | const char& operator[](size_t i) const; |
175 | const char& back() const; |
176 | |
177 | // Mutable Element Access |
178 | // NOTE: For VIEW/OFFSET types, calling these methods will result in the |
179 | // conversion to a SMALL or heap allocated LARGE type. As a result, |
180 | // previously obtained pointers, references, or iterators to the underlying |
181 | // buffer will point to the original VIEW/OFFSET and not the new allocation. |
182 | char* mdata(); |
183 | char* data(); // DEPRECATED: Use mdata(). |
184 | char& operator[](size_t i); |
185 | |
186 | // Assignment |
187 | tstring& assign(const char* str, size_t len); |
188 | tstring& assign(const char* str); |
189 | |
190 | // View Assignment |
191 | tstring& assign_as_view(const tstring& str); |
192 | tstring& assign_as_view(const std::string& str); |
193 | tstring& assign_as_view(const StringPiece str); |
194 | tstring& assign_as_view(const char* str, size_t len); |
195 | tstring& assign_as_view(const char* str); |
196 | |
197 | // Modifiers |
198 | // NOTE: Invalid input will result in undefined behavior. |
199 | tstring& append(const tstring& str); |
200 | tstring& append(const char* str, size_t len); |
201 | tstring& append(const char* str); |
202 | tstring& append(size_t n, char c); |
203 | |
204 | tstring& erase(size_t pos, size_t len); |
205 | |
206 | tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen); |
207 | tstring& insert(size_t pos, size_t n, char c); |
208 | void swap(tstring& str); |
209 | void push_back(char ch); |
210 | |
211 | // Friends |
212 | friend bool operator==(const char* a, const tstring& b); |
213 | friend bool operator==(const std::string& a, const tstring& b); |
214 | friend tstring operator+(const tstring& a, const tstring& b); |
215 | friend std::ostream& operator<<(std::ostream& o, const tstring& str); |
216 | friend std::hash<tstring>; |
217 | }; |
218 | |
219 | // Non-member function overloads |
220 | |
221 | bool operator==(const char* a, const tstring& b); |
222 | bool operator==(const std::string& a, const tstring& b); |
223 | tstring operator+(const tstring& a, const tstring& b); |
224 | std::ostream& operator<<(std::ostream& o, const tstring& str); |
225 | |
226 | // Implementations |
227 | |
228 | // Ctor |
229 | |
230 | inline tstring::tstring() { TF_TString_Init(&tstr_); } |
231 | |
232 | inline tstring::tstring(const char* str, size_t len) { |
233 | TF_TString_Init(&tstr_); |
234 | TF_TString_Copy(&tstr_, str, len); |
235 | } |
236 | |
237 | inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {} |
238 | |
239 | inline tstring::tstring(size_t n, char c) { |
240 | TF_TString_Init(&tstr_); |
241 | TF_TString_Resize(&tstr_, n, c); |
242 | } |
243 | |
244 | inline tstring::tstring(const std::string& str) |
245 | : tstring(str.data(), str.size()) {} |
246 | |
247 | inline tstring::tstring(const StringPiece str) |
248 | : tstring(str.data(), str.size()) {} |
249 | |
250 | #ifdef PLATFORM_GOOGLE |
251 | inline tstring::tstring(const absl::Cord& cord) { |
252 | TF_TString_Init(&tstr_); |
253 | TF_TString_ResizeUninitialized(&tstr_, cord.size()); |
254 | |
255 | cord.CopyToArray(data()); |
256 | } |
257 | #endif // PLATFORM_GOOGLE |
258 | |
259 | // Copy |
260 | |
261 | inline tstring::tstring(const tstring& str) { |
262 | TF_TString_Init(&tstr_); |
263 | TF_TString_Assign(&tstr_, &str.tstr_); |
264 | } |
265 | |
266 | // Move |
267 | |
268 | inline tstring::tstring(tstring&& str) noexcept { |
269 | TF_TString_Init(&tstr_); |
270 | TF_TString_Move(&tstr_, &str.tstr_); |
271 | } |
272 | |
273 | // Dtor |
274 | |
275 | inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); } |
276 | |
277 | // Copy Assignment |
278 | |
279 | inline tstring& tstring::operator=(const tstring& str) { |
280 | TF_TString_Assign(&tstr_, &str.tstr_); |
281 | |
282 | return *this; |
283 | } |
284 | |
285 | inline tstring& tstring::operator=(const std::string& str) { |
286 | TF_TString_Copy(&tstr_, str.data(), str.size()); |
287 | return *this; |
288 | } |
289 | |
290 | inline tstring& tstring::operator=(const char* str) { |
291 | TF_TString_Copy(&tstr_, str, ::strlen(str)); |
292 | |
293 | return *this; |
294 | } |
295 | |
296 | inline tstring& tstring::operator=(char c) { |
297 | resize_uninitialized(1); |
298 | (*this)[0] = c; |
299 | |
300 | return *this; |
301 | } |
302 | |
303 | inline tstring& tstring::operator=(const StringPiece str) { |
304 | TF_TString_Copy(&tstr_, str.data(), str.size()); |
305 | |
306 | return *this; |
307 | } |
308 | |
309 | #ifdef PLATFORM_GOOGLE |
310 | inline tstring& tstring::operator=(const absl::Cord& cord) { |
311 | TF_TString_ResizeUninitialized(&tstr_, cord.size()); |
312 | |
313 | cord.CopyToArray(data()); |
314 | |
315 | return *this; |
316 | } |
317 | #endif // PLATFORM_GOOGLE |
318 | |
319 | // View Assignment |
320 | |
321 | inline tstring& tstring::operator=(const tstring::view& tsv) { |
322 | assign_as_view(tsv.data(), tsv.size()); |
323 | |
324 | return *this; |
325 | } |
326 | |
327 | // Move Assignment |
328 | |
329 | inline tstring& tstring::operator=(tstring&& str) { |
330 | TF_TString_Move(&tstr_, &str.tstr_); |
331 | |
332 | return *this; |
333 | } |
334 | |
335 | // Comparison |
336 | |
337 | inline int tstring::compare(const char* str, size_t len) const { |
338 | int ret = ::memcmp(data(), str, std::min(len, size())); |
339 | |
340 | if (ret < 0) return -1; |
341 | if (ret > 0) return +1; |
342 | |
343 | if (size() < len) return -1; |
344 | if (size() > len) return +1; |
345 | |
346 | return 0; |
347 | } |
348 | |
349 | inline bool tstring::operator<(const tstring& o) const { |
350 | return compare(o.data(), o.size()) < 0; |
351 | } |
352 | |
353 | inline bool tstring::operator>(const tstring& o) const { |
354 | return compare(o.data(), o.size()) > 0; |
355 | } |
356 | |
357 | inline bool tstring::operator==(const char* str) const { |
358 | return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0; |
359 | } |
360 | |
361 | inline bool tstring::operator==(const tstring& o) const { |
362 | return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0; |
363 | } |
364 | |
365 | inline bool tstring::operator!=(const char* str) const { |
366 | return !(*this == str); |
367 | } |
368 | |
369 | inline bool tstring::operator!=(const tstring& o) const { |
370 | return !(*this == o); |
371 | } |
372 | |
373 | // Conversion Operators |
374 | |
375 | inline tstring::operator std::string() const { |
376 | return std::string(data(), size()); |
377 | } |
378 | |
379 | inline tstring::operator StringPiece() const { |
380 | return StringPiece(data(), size()); |
381 | } |
382 | |
383 | #ifdef PLATFORM_GOOGLE |
384 | template <typename T, typename std::enable_if< |
385 | std::is_same<T, absl::AlphaNum>::value, T>::type*> |
386 | inline tstring::operator T() const { |
387 | return T(StringPiece(*this)); |
388 | } |
389 | #endif // PLATFORM_GOOGLE |
390 | |
391 | // Attributes |
392 | |
393 | inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); } |
394 | |
395 | inline size_t tstring::length() const { return size(); } |
396 | |
397 | inline size_t tstring::capacity() const { |
398 | return TF_TString_GetCapacity(&tstr_); |
399 | } |
400 | |
401 | inline bool tstring::empty() const { return size() == 0; } |
402 | |
403 | inline tstring::Type tstring::type() const { |
404 | return static_cast<tstring::Type>(TF_TString_GetType(&tstr_)); |
405 | } |
406 | |
407 | // Allocation |
408 | |
409 | inline void tstring::resize(size_t new_size, char c) { |
410 | TF_TString_Resize(&tstr_, new_size, c); |
411 | } |
412 | |
413 | inline void tstring::resize_uninitialized(size_t new_size) { |
414 | TF_TString_ResizeUninitialized(&tstr_, new_size); |
415 | } |
416 | |
417 | inline void tstring::clear() noexcept { |
418 | TF_TString_ResizeUninitialized(&tstr_, 0); |
419 | } |
420 | |
421 | inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); } |
422 | |
423 | // Iterators |
424 | |
425 | inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; } |
426 | inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; } |
427 | |
428 | // Element Access |
429 | |
430 | inline const char* tstring::c_str() const { return data(); } |
431 | |
432 | inline const char* tstring::data() const { |
433 | return TF_TString_GetDataPointer(&tstr_); |
434 | } |
435 | |
436 | inline const char& tstring::operator[](size_t i) const { return data()[i]; } |
437 | |
438 | inline const char& tstring::back() const { return (*this)[size() - 1]; } |
439 | |
440 | inline char* tstring::mdata() { |
441 | return TF_TString_GetMutableDataPointer(&tstr_); |
442 | } |
443 | |
444 | inline char* tstring::data() { |
445 | // Deprecated |
446 | return mdata(); |
447 | } |
448 | |
449 | inline char& tstring::operator[](size_t i) { return mdata()[i]; } |
450 | |
451 | // Assignment |
452 | |
453 | inline tstring& tstring::assign(const char* str, size_t len) { |
454 | TF_TString_Copy(&tstr_, str, len); |
455 | |
456 | return *this; |
457 | } |
458 | |
459 | inline tstring& tstring::assign(const char* str) { |
460 | assign(str, ::strlen(str)); |
461 | |
462 | return *this; |
463 | } |
464 | |
465 | // View Assignment |
466 | |
467 | inline tstring& tstring::assign_as_view(const tstring& str) { |
468 | assign_as_view(str.data(), str.size()); |
469 | |
470 | return *this; |
471 | } |
472 | |
473 | inline tstring& tstring::assign_as_view(const std::string& str) { |
474 | assign_as_view(str.data(), str.size()); |
475 | |
476 | return *this; |
477 | } |
478 | |
479 | inline tstring& tstring::assign_as_view(const StringPiece str) { |
480 | assign_as_view(str.data(), str.size()); |
481 | |
482 | return *this; |
483 | } |
484 | |
485 | inline tstring& tstring::assign_as_view(const char* str, size_t len) { |
486 | TF_TString_AssignView(&tstr_, str, len); |
487 | |
488 | return *this; |
489 | } |
490 | |
491 | inline tstring& tstring::assign_as_view(const char* str) { |
492 | assign_as_view(str, ::strlen(str)); |
493 | |
494 | return *this; |
495 | } |
496 | |
497 | // Modifiers |
498 | |
499 | inline tstring& tstring::append(const tstring& str) { |
500 | TF_TString_Append(&tstr_, &str.tstr_); |
501 | |
502 | return *this; |
503 | } |
504 | |
505 | inline tstring& tstring::append(const char* str, size_t len) { |
506 | TF_TString_AppendN(&tstr_, str, len); |
507 | |
508 | return *this; |
509 | } |
510 | |
511 | inline tstring& tstring::append(const char* str) { |
512 | append(str, ::strlen(str)); |
513 | |
514 | return *this; |
515 | } |
516 | |
517 | inline tstring& tstring::append(size_t n, char c) { |
518 | // For append use cases, we want to ensure amortized growth. |
519 | const size_t new_size = size() + n; |
520 | TF_TString_ReserveAmortized(&tstr_, new_size); |
521 | resize(new_size, c); |
522 | |
523 | return *this; |
524 | } |
525 | |
526 | inline tstring& tstring::erase(size_t pos, size_t len) { |
527 | memmove(mdata() + pos, data() + pos + len, size() - len - pos); |
528 | |
529 | resize(size() - len); |
530 | |
531 | return *this; |
532 | } |
533 | |
534 | inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos, |
535 | size_t sublen) { |
536 | size_t orig_size = size(); |
537 | TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen); |
538 | |
539 | memmove(mdata() + pos + sublen, data() + pos, orig_size - pos); |
540 | memmove(mdata() + pos, str.data() + subpos, sublen); |
541 | |
542 | return *this; |
543 | } |
544 | |
545 | inline tstring& tstring::insert(size_t pos, size_t n, char c) { |
546 | size_t size_ = size(); |
547 | TF_TString_ResizeUninitialized(&tstr_, size_ + n); |
548 | |
549 | memmove(mdata() + pos + n, data() + pos, size_ - pos); |
550 | memset(mdata() + pos, c, n); |
551 | |
552 | return *this; |
553 | } |
554 | |
555 | inline void tstring::swap(tstring& str) { |
556 | // TODO(dero): Invalid for OFFSET (unimplemented). |
557 | std::swap(tstr_, str.tstr_); |
558 | } |
559 | |
560 | inline void tstring::push_back(char ch) { append(1, ch); } |
561 | |
562 | // Friends |
563 | |
564 | inline bool operator==(const char* a, const tstring& b) { |
565 | return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0; |
566 | } |
567 | |
568 | inline bool operator==(const std::string& a, const tstring& b) { |
569 | return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0; |
570 | } |
571 | |
572 | inline tstring operator+(const tstring& a, const tstring& b) { |
573 | tstring r; |
574 | r.reserve(a.size() + b.size()); |
575 | r.append(a); |
576 | r.append(b); |
577 | |
578 | return r; |
579 | } |
580 | |
581 | inline std::ostream& operator<<(std::ostream& o, const tstring& str) { |
582 | return o.write(str.data(), str.size()); |
583 | } |
584 | |
585 | } // namespace tsl |
586 | |
587 | #endif // TENSORFLOW_TSL_PLATFORM_TSTRING_H_ |
588 | |