1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_TSL_PLATFORM_FILE_SYSTEM_H_ |
17 | #define TENSORFLOW_TSL_PLATFORM_FILE_SYSTEM_H_ |
18 | |
19 | #include <stdint.h> |
20 | |
21 | #include <functional> |
22 | #include <string> |
23 | #include <unordered_map> |
24 | #include <utility> |
25 | #include <vector> |
26 | |
27 | #include "tensorflow/tsl/platform/cord.h" |
28 | #include "tensorflow/tsl/platform/errors.h" |
29 | #include "tensorflow/tsl/platform/file_statistics.h" |
30 | #include "tensorflow/tsl/platform/macros.h" |
31 | #include "tensorflow/tsl/platform/platform.h" |
32 | #include "tensorflow/tsl/platform/stringpiece.h" |
33 | #include "tensorflow/tsl/platform/types.h" |
34 | |
35 | #ifdef PLATFORM_WINDOWS |
36 | #undef DeleteFile |
37 | #undef CopyFile |
38 | #undef TranslateName |
39 | #endif |
40 | |
41 | namespace tsl { |
42 | |
43 | class RandomAccessFile; |
44 | class ReadOnlyMemoryRegion; |
45 | class WritableFile; |
46 | |
47 | class FileSystem; |
48 | struct TransactionToken { |
49 | FileSystem* owner; |
50 | void* token; |
51 | }; |
52 | |
53 | /// A generic interface for accessing a file system. Implementations |
54 | /// of custom filesystem adapters must implement this interface, |
55 | /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes. |
56 | class FileSystem { |
57 | public: |
58 | /// \brief Creates a brand new random access read-only file with the |
59 | /// specified name. |
60 | /// |
61 | /// On success, stores a pointer to the new file in |
62 | /// *result and returns OK. On failure stores NULL in *result and |
63 | /// returns non-OK. If the file does not exist, returns a non-OK |
64 | /// status. |
65 | /// |
66 | /// The returned file may be concurrently accessed by multiple threads. |
67 | /// |
68 | /// The ownership of the returned RandomAccessFile is passed to the caller |
69 | /// and the object should be deleted when is not used. |
70 | virtual tsl::Status NewRandomAccessFile( |
71 | const std::string& fname, std::unique_ptr<RandomAccessFile>* result) { |
72 | return NewRandomAccessFile(fname, nullptr, result); |
73 | } |
74 | |
75 | virtual tsl::Status NewRandomAccessFile( |
76 | const std::string& fname, TransactionToken* token, |
77 | std::unique_ptr<RandomAccessFile>* result) { |
78 | // We duplicate these methods due to Google internal coding style prevents |
79 | // virtual functions with default arguments. See PR #41615. |
80 | return OkStatus(); |
81 | } |
82 | |
83 | /// \brief Creates an object that writes to a new file with the specified |
84 | /// name. |
85 | /// |
86 | /// Deletes any existing file with the same name and creates a |
87 | /// new file. On success, stores a pointer to the new file in |
88 | /// *result and returns OK. On failure stores NULL in *result and |
89 | /// returns non-OK. |
90 | /// |
91 | /// The returned file will only be accessed by one thread at a time. |
92 | /// |
93 | /// The ownership of the returned WritableFile is passed to the caller |
94 | /// and the object should be deleted when is not used. |
95 | virtual tsl::Status NewWritableFile(const std::string& fname, |
96 | std::unique_ptr<WritableFile>* result) { |
97 | return NewWritableFile(fname, nullptr, result); |
98 | } |
99 | |
100 | virtual tsl::Status NewWritableFile(const std::string& fname, |
101 | TransactionToken* token, |
102 | std::unique_ptr<WritableFile>* result) { |
103 | return OkStatus(); |
104 | } |
105 | |
106 | /// \brief Creates an object that either appends to an existing file, or |
107 | /// writes to a new file (if the file does not exist to begin with). |
108 | /// |
109 | /// On success, stores a pointer to the new file in *result and |
110 | /// returns OK. On failure stores NULL in *result and returns |
111 | /// non-OK. |
112 | /// |
113 | /// The returned file will only be accessed by one thread at a time. |
114 | /// |
115 | /// The ownership of the returned WritableFile is passed to the caller |
116 | /// and the object should be deleted when is not used. |
117 | virtual tsl::Status NewAppendableFile(const std::string& fname, |
118 | std::unique_ptr<WritableFile>* result) { |
119 | return NewAppendableFile(fname, nullptr, result); |
120 | } |
121 | |
122 | virtual tsl::Status NewAppendableFile(const std::string& fname, |
123 | TransactionToken* token, |
124 | std::unique_ptr<WritableFile>* result) { |
125 | return OkStatus(); |
126 | } |
127 | |
128 | /// \brief Creates a readonly region of memory with the file context. |
129 | /// |
130 | /// On success, it returns a pointer to read-only memory region |
131 | /// from the content of file fname. The ownership of the region is passed to |
132 | /// the caller. On failure stores nullptr in *result and returns non-OK. |
133 | /// |
134 | /// The returned memory region can be accessed from many threads in parallel. |
135 | /// |
136 | /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller |
137 | /// and the object should be deleted when is not used. |
138 | virtual tsl::Status NewReadOnlyMemoryRegionFromFile( |
139 | const std::string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) { |
140 | return NewReadOnlyMemoryRegionFromFile(fname, nullptr, result); |
141 | } |
142 | |
143 | virtual tsl::Status NewReadOnlyMemoryRegionFromFile( |
144 | const std::string& fname, TransactionToken* token, |
145 | std::unique_ptr<ReadOnlyMemoryRegion>* result) { |
146 | return OkStatus(); |
147 | } |
148 | |
149 | /// Returns OK if the named path exists and NOT_FOUND otherwise. |
150 | virtual tsl::Status FileExists(const std::string& fname) { |
151 | return FileExists(fname, nullptr); |
152 | } |
153 | |
154 | virtual tsl::Status FileExists(const std::string& fname, |
155 | TransactionToken* token) { |
156 | return OkStatus(); |
157 | } |
158 | |
159 | /// Returns true if all the listed files exist, false otherwise. |
160 | /// if status is not null, populate the vector with a detailed status |
161 | /// for each file. |
162 | virtual bool FilesExist(const std::vector<string>& files, |
163 | std::vector<Status>* status) { |
164 | return FilesExist(files, nullptr, status); |
165 | } |
166 | |
167 | virtual bool FilesExist(const std::vector<string>& files, |
168 | TransactionToken* token, std::vector<Status>* status); |
169 | |
170 | /// \brief Returns the immediate children in the given directory. |
171 | /// |
172 | /// The returned paths are relative to 'dir'. |
173 | virtual tsl::Status GetChildren(const std::string& dir, |
174 | std::vector<string>* result) { |
175 | return GetChildren(dir, nullptr, result); |
176 | } |
177 | |
178 | virtual tsl::Status GetChildren(const std::string& dir, |
179 | TransactionToken* token, |
180 | std::vector<string>* result) { |
181 | return OkStatus(); |
182 | } |
183 | |
184 | /// \brief Given a pattern, stores in *results the set of paths that matches |
185 | /// that pattern. *results is cleared. |
186 | /// |
187 | /// pattern must match all of a name, not just a substring. |
188 | /// |
189 | /// pattern: { term } |
190 | /// term: |
191 | /// '*': matches any sequence of non-'/' characters |
192 | /// '?': matches a single non-'/' character |
193 | /// '[' [ '^' ] { match-list } ']': |
194 | /// matches any single character (not) on the list |
195 | /// c: matches character c (c != '*', '?', '\\', '[') |
196 | /// '\\' c: matches character c |
197 | /// character-range: |
198 | /// c: matches character c (c != '\\', '-', ']') |
199 | /// '\\' c: matches character c |
200 | /// lo '-' hi: matches character c for lo <= c <= hi |
201 | /// |
202 | /// Typical return codes: |
203 | /// * OK - no errors |
204 | /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not |
205 | /// implemented |
206 | virtual tsl::Status GetMatchingPaths(const std::string& pattern, |
207 | std::vector<string>* results) { |
208 | return GetMatchingPaths(pattern, nullptr, results); |
209 | } |
210 | |
211 | virtual tsl::Status GetMatchingPaths(const std::string& pattern, |
212 | TransactionToken* token, |
213 | std::vector<string>* results) { |
214 | return OkStatus(); |
215 | } |
216 | |
217 | /// \brief Checks if the given filename matches the pattern. |
218 | /// |
219 | /// This function provides the equivalent of posix fnmatch, however it is |
220 | /// implemented without fnmatch to ensure that this can be used for cloud |
221 | /// filesystems on windows. For windows filesystems, it uses PathMatchSpec. |
222 | virtual bool Match(const std::string& filename, const std::string& pattern); |
223 | |
224 | /// \brief Obtains statistics for the given path. |
225 | virtual tsl::Status Stat(const std::string& fname, FileStatistics* stat) { |
226 | return Stat(fname, nullptr, stat); |
227 | } |
228 | |
229 | virtual tsl::Status Stat(const std::string& fname, TransactionToken* token, |
230 | FileStatistics* stat) { |
231 | return OkStatus(); |
232 | } |
233 | |
234 | /// \brief Deletes the named file. |
235 | virtual tsl::Status DeleteFile(const std::string& fname) { |
236 | return DeleteFile(fname, nullptr); |
237 | } |
238 | |
239 | virtual tsl::Status DeleteFile(const std::string& fname, |
240 | TransactionToken* token) { |
241 | return OkStatus(); |
242 | } |
243 | |
244 | /// \brief Creates the specified directory. |
245 | /// Typical return codes: |
246 | /// * OK - successfully created the directory. |
247 | /// * ALREADY_EXISTS - directory with name dirname already exists. |
248 | /// * PERMISSION_DENIED - dirname is not writable. |
249 | virtual tsl::Status CreateDir(const std::string& dirname) { |
250 | return CreateDir(dirname, nullptr); |
251 | } |
252 | |
253 | virtual tsl::Status CreateDir(const std::string& dirname, |
254 | TransactionToken* token) { |
255 | return OkStatus(); |
256 | } |
257 | |
258 | /// \brief Creates the specified directory and all the necessary |
259 | /// subdirectories. |
260 | /// Typical return codes: |
261 | /// * OK - successfully created the directory and sub directories, even if |
262 | /// they were already created. |
263 | /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. |
264 | virtual tsl::Status RecursivelyCreateDir(const std::string& dirname) { |
265 | return RecursivelyCreateDir(dirname, nullptr); |
266 | } |
267 | |
268 | virtual tsl::Status RecursivelyCreateDir(const std::string& dirname, |
269 | TransactionToken* token); |
270 | |
271 | /// \brief Deletes the specified directory. |
272 | virtual tsl::Status DeleteDir(const std::string& dirname) { |
273 | return DeleteDir(dirname, nullptr); |
274 | } |
275 | |
276 | virtual tsl::Status DeleteDir(const std::string& dirname, |
277 | TransactionToken* token) { |
278 | return OkStatus(); |
279 | } |
280 | |
281 | /// \brief Deletes the specified directory and all subdirectories and files |
282 | /// underneath it. This is accomplished by traversing the directory tree |
283 | /// rooted at dirname and deleting entries as they are encountered. |
284 | /// |
285 | /// If dirname itself is not readable or does not exist, *undeleted_dir_count |
286 | /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status |
287 | /// (e.g. NOT_FOUND) is returned. |
288 | /// |
289 | /// If dirname and all its descendants were successfully deleted, TF_OK is |
290 | /// returned and both error counters are set to zero. |
291 | /// |
292 | /// Otherwise, while traversing the tree, undeleted_file_count and |
293 | /// undeleted_dir_count are updated if an entry of the corresponding type |
294 | /// could not be deleted. The returned error status represents the reason that |
295 | /// any one of these entries could not be deleted. |
296 | /// |
297 | /// REQUIRES: undeleted_files, undeleted_dirs to be not null. |
298 | /// |
299 | /// Typical return codes: |
300 | /// * OK - dirname exists and we were able to delete everything underneath. |
301 | /// * NOT_FOUND - dirname doesn't exist |
302 | /// * PERMISSION_DENIED - dirname or some descendant is not writable |
303 | /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not |
304 | /// implemented |
305 | virtual tsl::Status DeleteRecursively(const std::string& dirname, |
306 | int64_t* undeleted_files, |
307 | int64_t* undeleted_dirs) { |
308 | return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs); |
309 | } |
310 | |
311 | virtual tsl::Status DeleteRecursively(const std::string& dirname, |
312 | TransactionToken* token, |
313 | int64_t* undeleted_files, |
314 | int64_t* undeleted_dirs); |
315 | |
316 | /// \brief Stores the size of `fname` in `*file_size`. |
317 | virtual tsl::Status GetFileSize(const std::string& fname, uint64* file_size) { |
318 | return GetFileSize(fname, nullptr, file_size); |
319 | } |
320 | |
321 | virtual tsl::Status GetFileSize(const std::string& fname, |
322 | TransactionToken* token, uint64* file_size) { |
323 | return OkStatus(); |
324 | } |
325 | |
326 | /// \brief Overwrites the target if it exists. |
327 | virtual tsl::Status RenameFile(const std::string& src, |
328 | const std::string& target) { |
329 | return RenameFile(src, target, nullptr); |
330 | } |
331 | |
332 | virtual tsl::Status RenameFile(const std::string& src, |
333 | const std::string& target, |
334 | TransactionToken* token) { |
335 | return OkStatus(); |
336 | } |
337 | |
338 | /// \brief Copy the src to target. |
339 | virtual tsl::Status CopyFile(const std::string& src, |
340 | const std::string& target) { |
341 | return CopyFile(src, target, nullptr); |
342 | } |
343 | |
344 | virtual tsl::Status CopyFile(const std::string& src, |
345 | const std::string& target, |
346 | TransactionToken* token); |
347 | |
348 | /// \brief Translate an URI to a filename for the FileSystem implementation. |
349 | /// |
350 | /// The implementation in this class cleans up the path, removing |
351 | /// duplicate /'s, resolving .. and removing trailing '/'. |
352 | /// This respects relative vs. absolute paths, but does not |
353 | /// invoke any system calls (getcwd(2)) in order to resolve relative |
354 | /// paths with respect to the actual working directory. That is, this is |
355 | /// purely string manipulation, completely independent of process state. |
356 | virtual std::string TranslateName(const std::string& name) const; |
357 | |
358 | /// \brief Returns whether the given path is a directory or not. |
359 | /// |
360 | /// Typical return codes (not guaranteed exhaustive): |
361 | /// * OK - The path exists and is a directory. |
362 | /// * FAILED_PRECONDITION - The path exists and is not a directory. |
363 | /// * NOT_FOUND - The path entry does not exist. |
364 | /// * PERMISSION_DENIED - Insufficient permissions. |
365 | /// * UNIMPLEMENTED - The file factory doesn't support directories. |
366 | virtual tsl::Status IsDirectory(const std::string& fname) { |
367 | return IsDirectory(fname, nullptr); |
368 | } |
369 | |
370 | virtual tsl::Status IsDirectory(const std::string& fname, |
371 | TransactionToken* token); |
372 | |
373 | /// \brief Returns whether the given path is on a file system |
374 | /// that has atomic move capabilities. This can be used |
375 | /// to determine if there needs to be a temp location to safely write objects. |
376 | /// The second boolean argument has_atomic_move contains this information. |
377 | /// |
378 | /// Returns one of the following status codes (not guaranteed exhaustive): |
379 | /// * OK - The path is on a recognized file system, |
380 | /// so has_atomic_move holds the above information. |
381 | /// * UNIMPLEMENTED - The file system of the path hasn't been implemented in |
382 | /// TF |
383 | virtual Status HasAtomicMove(const std::string& path, bool* has_atomic_move); |
384 | |
385 | /// \brief Flushes any cached filesystem objects from memory. |
386 | virtual void FlushCaches() { FlushCaches(nullptr); } |
387 | |
388 | virtual void FlushCaches(TransactionToken* token); |
389 | |
390 | /// \brief The separator this filesystem uses. |
391 | /// |
392 | /// This is implemented as a part of the filesystem, because even on windows, |
393 | /// a user may need access to filesystems with '/' separators, such as cloud |
394 | /// filesystems. |
395 | virtual char Separator() const; |
396 | |
397 | /// \brief Split a path to its basename and dirname. |
398 | /// |
399 | /// Helper function for Basename and Dirname. |
400 | std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) const; |
401 | |
402 | /// \brief returns the final file name in the given path. |
403 | /// |
404 | /// Returns the part of the path after the final "/". If there is no |
405 | /// "/" in the path, the result is the same as the input. |
406 | virtual StringPiece Basename(StringPiece path) const; |
407 | |
408 | /// \brief Returns the part of the path before the final "/". |
409 | /// |
410 | /// If there is a single leading "/" in the path, the result will be the |
411 | /// leading "/". If there is no "/" in the path, the result is the empty |
412 | /// prefix of the input. |
413 | StringPiece Dirname(StringPiece path) const; |
414 | |
415 | /// \brief Returns the part of the basename of path after the final ".". |
416 | /// |
417 | /// If there is no "." in the basename, the result is empty. |
418 | StringPiece Extension(StringPiece path) const; |
419 | |
420 | /// \brief Clean duplicate and trailing, "/"s, and resolve ".." and ".". |
421 | /// |
422 | /// NOTE: This respects relative vs. absolute paths, but does not |
423 | /// invoke any system calls (getcwd(2)) in order to resolve relative |
424 | /// paths with respect to the actual working directory. That is, this is |
425 | /// purely string manipulation, completely independent of process state. |
426 | std::string CleanPath(StringPiece path) const; |
427 | |
428 | /// \brief Creates a URI from a scheme, host, and path. |
429 | /// |
430 | /// If the scheme is empty, we just return the path. |
431 | std::string CreateURI(StringPiece scheme, StringPiece host, |
432 | StringPiece path) const; |
433 | |
434 | /// \brief Creates a temporary file name with an extension. |
435 | std::string GetTempFilename(const std::string& extension) const; |
436 | |
437 | /// \brief Return true if path is absolute. |
438 | bool IsAbsolutePath(tsl::StringPiece path) const; |
439 | |
440 | #ifndef SWIG // variadic templates |
441 | /// \brief Join multiple paths together. |
442 | /// |
443 | /// This function also removes the unnecessary path separators. |
444 | /// For example: |
445 | /// |
446 | /// Arguments | JoinPath |
447 | /// ---------------------------+---------- |
448 | /// '/foo', 'bar' | /foo/bar |
449 | /// '/foo/', 'bar' | /foo/bar |
450 | /// '/foo', '/bar' | /foo/bar |
451 | /// |
452 | /// Usage: |
453 | /// string path = io::JoinPath("/mydir", filename); |
454 | /// string path = io::JoinPath(FLAGS_test_srcdir, filename); |
455 | /// string path = io::JoinPath("/full", "path", "to", "filename"); |
456 | template <typename... T> |
457 | std::string JoinPath(const T&... args) { |
458 | return JoinPathImpl({args...}); |
459 | } |
460 | #endif /* SWIG */ |
461 | |
462 | std::string JoinPathImpl(std::initializer_list<tsl::StringPiece> paths); |
463 | |
464 | /// \brief Populates the scheme, host, and path from a URI. |
465 | /// |
466 | /// scheme, host, and path are guaranteed by this function to point into the |
467 | /// contents of uri, even if empty. |
468 | /// |
469 | /// Corner cases: |
470 | /// - If the URI is invalid, scheme and host are set to empty strings and the |
471 | /// passed string is assumed to be a path |
472 | /// - If the URI omits the path (e.g. file://host), then the path is left |
473 | /// empty. |
474 | void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, |
475 | StringPiece* path) const; |
476 | |
477 | // Transaction related API |
478 | |
479 | /// \brief Starts a new transaction |
480 | virtual tsl::Status StartTransaction(TransactionToken** token) { |
481 | *token = nullptr; |
482 | return OkStatus(); |
483 | } |
484 | |
485 | /// \brief Adds `path` to transaction in `token` |
486 | virtual tsl::Status AddToTransaction(const std::string& path, |
487 | TransactionToken* token) { |
488 | return OkStatus(); |
489 | } |
490 | |
491 | /// \brief Ends transaction |
492 | virtual tsl::Status EndTransaction(TransactionToken* token) { |
493 | return OkStatus(); |
494 | } |
495 | |
496 | /// \brief Get token for `path` or start a new transaction and add `path` to |
497 | /// it. |
498 | virtual tsl::Status GetTokenOrStartTransaction(const std::string& path, |
499 | TransactionToken** token) { |
500 | *token = nullptr; |
501 | return OkStatus(); |
502 | } |
503 | |
504 | /// \brief Return transaction for `path` or nullptr in `token` |
505 | virtual tsl::Status GetTransactionForPath(const std::string& path, |
506 | TransactionToken** token) { |
507 | *token = nullptr; |
508 | return OkStatus(); |
509 | } |
510 | |
511 | /// \brief Decode transaction to human readable string. |
512 | virtual std::string DecodeTransaction(const TransactionToken* token); |
513 | |
514 | /// \brief Set File System Configuration Options |
515 | virtual Status SetOption(const string& key, const string& value) { |
516 | return errors::Unimplemented("SetOption" ); |
517 | } |
518 | |
519 | /// \brief Set File System Configuration Option |
520 | virtual tsl::Status SetOption(const std::string& name, |
521 | const std::vector<string>& values) { |
522 | return errors::Unimplemented("SetOption" ); |
523 | } |
524 | |
525 | /// \brief Set File System Configuration Option |
526 | virtual tsl::Status SetOption(const std::string& name, |
527 | const std::vector<int64_t>& values) { |
528 | return errors::Unimplemented("SetOption" ); |
529 | } |
530 | |
531 | /// \brief Set File System Configuration Option |
532 | virtual tsl::Status SetOption(const std::string& name, |
533 | const std::vector<double>& values) { |
534 | return errors::Unimplemented("SetOption" ); |
535 | } |
536 | |
537 | FileSystem() {} |
538 | |
539 | virtual ~FileSystem() = default; |
540 | }; |
541 | /// This macro adds forwarding methods from FileSystem class to |
542 | /// used class since name hiding will prevent these to be accessed from |
543 | /// derived classes and would require all use locations to migrate to |
544 | /// Transactional API. This is an interim solution until ModularFileSystem class |
545 | /// becomes a singleton. |
546 | // TODO(sami): Remove this macro when filesystem plugins migration is complete. |
547 | #define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ |
548 | using FileSystem::NewRandomAccessFile; \ |
549 | using FileSystem::NewWritableFile; \ |
550 | using FileSystem::NewAppendableFile; \ |
551 | using FileSystem::NewReadOnlyMemoryRegionFromFile; \ |
552 | using FileSystem::FileExists; \ |
553 | using FileSystem::GetChildren; \ |
554 | using FileSystem::GetMatchingPaths; \ |
555 | using FileSystem::Stat; \ |
556 | using FileSystem::DeleteFile; \ |
557 | using FileSystem::RecursivelyCreateDir; \ |
558 | using FileSystem::DeleteDir; \ |
559 | using FileSystem::DeleteRecursively; \ |
560 | using FileSystem::GetFileSize; \ |
561 | using FileSystem::RenameFile; \ |
562 | using FileSystem::CopyFile; \ |
563 | using FileSystem::IsDirectory; \ |
564 | using FileSystem::FlushCaches |
565 | |
566 | /// A Wrapper class for Transactional FileSystem support. |
567 | /// This provides means to make use of the transactions with minimal code change |
568 | /// Any operations that are done through this interface will be through the |
569 | /// transaction created at the time of construction of this instance. |
570 | /// See FileSystem documentation for method descriptions. |
571 | /// This class simply forwards all calls to wrapped filesystem either with given |
572 | /// transaction token or with token used in its construction. This allows doing |
573 | /// transactional filesystem access with minimal code change. |
574 | class WrappedFileSystem : public FileSystem { |
575 | public: |
576 | TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; |
577 | |
578 | tsl::Status NewRandomAccessFile( |
579 | const std::string& fname, TransactionToken* token, |
580 | std::unique_ptr<RandomAccessFile>* result) override { |
581 | return fs_->NewRandomAccessFile(fname, (token ? token : token_), result); |
582 | } |
583 | |
584 | tsl::Status NewWritableFile(const std::string& fname, TransactionToken* token, |
585 | std::unique_ptr<WritableFile>* result) override { |
586 | return fs_->NewWritableFile(fname, (token ? token : token_), result); |
587 | } |
588 | |
589 | tsl::Status NewAppendableFile( |
590 | const std::string& fname, TransactionToken* token, |
591 | std::unique_ptr<WritableFile>* result) override { |
592 | return fs_->NewAppendableFile(fname, (token ? token : token_), result); |
593 | } |
594 | |
595 | tsl::Status NewReadOnlyMemoryRegionFromFile( |
596 | const std::string& fname, TransactionToken* token, |
597 | std::unique_ptr<ReadOnlyMemoryRegion>* result) override { |
598 | return fs_->NewReadOnlyMemoryRegionFromFile(fname, (token ? token : token_), |
599 | result); |
600 | } |
601 | |
602 | tsl::Status FileExists(const std::string& fname, |
603 | TransactionToken* token) override { |
604 | return fs_->FileExists(fname, (token ? token : token_)); |
605 | } |
606 | |
607 | bool FilesExist(const std::vector<string>& files, TransactionToken* token, |
608 | std::vector<Status>* status) override { |
609 | return fs_->FilesExist(files, (token ? token : token_), status); |
610 | } |
611 | |
612 | tsl::Status GetChildren(const std::string& dir, TransactionToken* token, |
613 | std::vector<string>* result) override { |
614 | return fs_->GetChildren(dir, (token ? token : token_), result); |
615 | } |
616 | |
617 | tsl::Status GetMatchingPaths(const std::string& pattern, |
618 | TransactionToken* token, |
619 | std::vector<string>* results) override { |
620 | return fs_->GetMatchingPaths(pattern, (token ? token : token_), results); |
621 | } |
622 | |
623 | bool Match(const std::string& filename, const std::string& pattern) override { |
624 | return fs_->Match(filename, pattern); |
625 | } |
626 | |
627 | tsl::Status Stat(const std::string& fname, TransactionToken* token, |
628 | FileStatistics* stat) override { |
629 | return fs_->Stat(fname, (token ? token : token_), stat); |
630 | } |
631 | |
632 | tsl::Status DeleteFile(const std::string& fname, |
633 | TransactionToken* token) override { |
634 | return fs_->DeleteFile(fname, (token ? token : token_)); |
635 | } |
636 | |
637 | tsl::Status CreateDir(const std::string& dirname, |
638 | TransactionToken* token) override { |
639 | return fs_->CreateDir(dirname, (token ? token : token_)); |
640 | } |
641 | |
642 | tsl::Status RecursivelyCreateDir(const std::string& dirname, |
643 | TransactionToken* token) override { |
644 | return fs_->RecursivelyCreateDir(dirname, (token ? token : token_)); |
645 | } |
646 | |
647 | tsl::Status DeleteDir(const std::string& dirname, |
648 | TransactionToken* token) override { |
649 | return fs_->DeleteDir(dirname, (token ? token : token_)); |
650 | } |
651 | |
652 | tsl::Status DeleteRecursively(const std::string& dirname, |
653 | TransactionToken* token, |
654 | int64_t* undeleted_files, |
655 | int64_t* undeleted_dirs) override { |
656 | return fs_->DeleteRecursively(dirname, (token ? token : token_), |
657 | undeleted_files, undeleted_dirs); |
658 | } |
659 | |
660 | tsl::Status GetFileSize(const std::string& fname, TransactionToken* token, |
661 | uint64* file_size) override { |
662 | return fs_->GetFileSize(fname, (token ? token : token_), file_size); |
663 | } |
664 | |
665 | tsl::Status RenameFile(const std::string& src, const std::string& target, |
666 | TransactionToken* token) override { |
667 | return fs_->RenameFile(src, target, (token ? token : token_)); |
668 | } |
669 | |
670 | tsl::Status CopyFile(const std::string& src, const std::string& target, |
671 | TransactionToken* token) override { |
672 | return fs_->CopyFile(src, target, (token ? token : token_)); |
673 | } |
674 | |
675 | std::string TranslateName(const std::string& name) const override { |
676 | return fs_->TranslateName(name); |
677 | } |
678 | |
679 | tsl::Status IsDirectory(const std::string& fname, |
680 | TransactionToken* token) override { |
681 | return fs_->IsDirectory(fname, (token ? token : token_)); |
682 | } |
683 | |
684 | Status HasAtomicMove(const std::string& path, |
685 | bool* has_atomic_move) override { |
686 | return fs_->HasAtomicMove(path, has_atomic_move); |
687 | } |
688 | |
689 | void FlushCaches(TransactionToken* token) override { |
690 | return fs_->FlushCaches((token ? token : token_)); |
691 | } |
692 | |
693 | char Separator() const override { return fs_->Separator(); } |
694 | |
695 | StringPiece Basename(StringPiece path) const override { |
696 | return fs_->Basename(path); |
697 | } |
698 | |
699 | tsl::Status StartTransaction(TransactionToken** token) override { |
700 | return fs_->StartTransaction(token); |
701 | } |
702 | |
703 | tsl::Status AddToTransaction(const std::string& path, |
704 | TransactionToken* token) override { |
705 | return fs_->AddToTransaction(path, (token ? token : token_)); |
706 | } |
707 | |
708 | tsl::Status EndTransaction(TransactionToken* token) override { |
709 | return fs_->EndTransaction(token); |
710 | } |
711 | |
712 | tsl::Status GetTransactionForPath(const std::string& path, |
713 | TransactionToken** token) override { |
714 | return fs_->GetTransactionForPath(path, token); |
715 | } |
716 | |
717 | tsl::Status GetTokenOrStartTransaction(const std::string& path, |
718 | TransactionToken** token) override { |
719 | return fs_->GetTokenOrStartTransaction(path, token); |
720 | } |
721 | |
722 | std::string DecodeTransaction(const TransactionToken* token) override { |
723 | return fs_->DecodeTransaction((token ? token : token_)); |
724 | } |
725 | |
726 | WrappedFileSystem(FileSystem* file_system, TransactionToken* token) |
727 | : fs_(file_system), token_(token) {} |
728 | |
729 | ~WrappedFileSystem() override = default; |
730 | |
731 | private: |
732 | FileSystem* fs_; |
733 | TransactionToken* token_; |
734 | }; |
735 | |
736 | /// A file abstraction for randomly reading the contents of a file. |
737 | class RandomAccessFile { |
738 | public: |
739 | RandomAccessFile() {} |
740 | virtual ~RandomAccessFile() = default; |
741 | |
742 | /// \brief Returns the name of the file. |
743 | /// |
744 | /// This is an optional operation that may not be implemented by every |
745 | /// filesystem. |
746 | virtual tsl::Status Name(StringPiece* result) const { |
747 | return errors::Unimplemented("This filesystem does not support Name()" ); |
748 | } |
749 | |
750 | /// \brief Reads up to `n` bytes from the file starting at `offset`. |
751 | /// |
752 | /// `scratch[0..n-1]` may be written by this routine. Sets `*result` |
753 | /// to the data that was read (including if fewer than `n` bytes were |
754 | /// successfully read). May set `*result` to point at data in |
755 | /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when |
756 | /// `*result` is used. |
757 | /// |
758 | /// On OK returned status: `n` bytes have been stored in `*result`. |
759 | /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`. |
760 | /// |
761 | /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result` |
762 | /// because of EOF. |
763 | /// |
764 | /// Safe for concurrent use by multiple threads. |
765 | virtual tsl::Status Read(uint64 offset, size_t n, StringPiece* result, |
766 | char* scratch) const = 0; |
767 | |
768 | #if defined(TF_CORD_SUPPORT) |
769 | /// \brief Read up to `n` bytes from the file starting at `offset`. |
770 | virtual tsl::Status Read(uint64 offset, size_t n, absl::Cord* cord) const { |
771 | return errors::Unimplemented( |
772 | "Read(uint64, size_t, absl::Cord*) is not " |
773 | "implemented" ); |
774 | } |
775 | #endif |
776 | |
777 | private: |
778 | TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile); |
779 | }; |
780 | |
781 | /// \brief A file abstraction for sequential writing. |
782 | /// |
783 | /// The implementation must provide buffering since callers may append |
784 | /// small fragments at a time to the file. |
785 | class WritableFile { |
786 | public: |
787 | WritableFile() {} |
788 | virtual ~WritableFile() = default; |
789 | |
790 | /// \brief Append 'data' to the file. |
791 | virtual tsl::Status Append(StringPiece data) = 0; |
792 | |
793 | #if defined(TF_CORD_SUPPORT) |
794 | // \brief Append 'data' to the file. |
795 | virtual tsl::Status Append(const absl::Cord& cord) { |
796 | for (StringPiece chunk : cord.Chunks()) { |
797 | TF_RETURN_IF_ERROR(Append(chunk)); |
798 | } |
799 | return OkStatus(); |
800 | } |
801 | #endif |
802 | |
803 | /// \brief Close the file. |
804 | /// |
805 | /// Flush() and de-allocate resources associated with this file |
806 | /// |
807 | /// Typical return codes (not guaranteed to be exhaustive): |
808 | /// * OK |
809 | /// * Other codes, as returned from Flush() |
810 | virtual tsl::Status Close() = 0; |
811 | |
812 | /// \brief Flushes the file and optionally syncs contents to filesystem. |
813 | /// |
814 | /// This should flush any local buffers whose contents have not been |
815 | /// delivered to the filesystem. |
816 | /// |
817 | /// If the process terminates after a successful flush, the contents |
818 | /// may still be persisted, since the underlying filesystem may |
819 | /// eventually flush the contents. If the OS or machine crashes |
820 | /// after a successful flush, the contents may or may not be |
821 | /// persisted, depending on the implementation. |
822 | virtual tsl::Status Flush() = 0; |
823 | |
824 | // \brief Returns the name of the file. |
825 | /// |
826 | /// This is an optional operation that may not be implemented by every |
827 | /// filesystem. |
828 | virtual tsl::Status Name(StringPiece* result) const { |
829 | return errors::Unimplemented("This filesystem does not support Name()" ); |
830 | } |
831 | |
832 | /// \brief Syncs contents of file to filesystem. |
833 | /// |
834 | /// This waits for confirmation from the filesystem that the contents |
835 | /// of the file have been persisted to the filesystem; if the OS |
836 | /// or machine crashes after a successful Sync, the contents should |
837 | /// be properly saved. |
838 | virtual tsl::Status Sync() = 0; |
839 | |
840 | /// \brief Retrieves the current write position in the file, or -1 on |
841 | /// error. |
842 | /// |
843 | /// This is an optional operation, subclasses may choose to return |
844 | /// errors::Unimplemented. |
845 | virtual tsl::Status Tell(int64_t* position) { |
846 | *position = -1; |
847 | return errors::Unimplemented("This filesystem does not support Tell()" ); |
848 | } |
849 | |
850 | private: |
851 | TF_DISALLOW_COPY_AND_ASSIGN(WritableFile); |
852 | }; |
853 | |
854 | /// \brief A readonly memmapped file abstraction. |
855 | /// |
856 | /// The implementation must guarantee that all memory is accessible when the |
857 | /// object exists, independently from the Env that created it. |
858 | class ReadOnlyMemoryRegion { |
859 | public: |
860 | ReadOnlyMemoryRegion() {} |
861 | virtual ~ReadOnlyMemoryRegion() = default; |
862 | |
863 | /// \brief Returns a pointer to the memory region. |
864 | virtual const void* data() = 0; |
865 | |
866 | /// \brief Returns the length of the memory region in bytes. |
867 | virtual uint64 length() = 0; |
868 | }; |
869 | |
870 | /// \brief A registry for file system implementations. |
871 | /// |
872 | /// Filenames are specified as an URI, which is of the form |
873 | /// [scheme://]<filename>. |
874 | /// File system implementations are registered using the REGISTER_FILE_SYSTEM |
875 | /// macro, providing the 'scheme' as the key. |
876 | /// |
877 | /// There are two `Register` methods: one using `Factory` for legacy filesystems |
878 | /// (deprecated mechanism of subclassing `FileSystem` and using |
879 | /// `REGISTER_FILE_SYSTEM` macro), and one using `std::unique_ptr<FileSystem>` |
880 | /// for the new modular approach. |
881 | /// |
882 | /// Note that the new API expects a pointer to `ModularFileSystem` but this is |
883 | /// not checked as there should be exactly one caller to the API and doing the |
884 | /// check results in a circular dependency between `BUILD` targets. |
885 | /// |
886 | /// Plan is to completely remove the filesystem registration from `Env` and |
887 | /// incorporate it into `ModularFileSystem` class (which will be renamed to be |
888 | /// the only `FileSystem` class and marked as `final`). But this will happen at |
889 | /// a later time, after we convert all filesystems to the new API. |
890 | /// |
891 | /// TODO(b/139060984): After all filesystems are converted, remove old |
892 | /// registration and update comment. |
893 | class FileSystemRegistry { |
894 | public: |
895 | typedef std::function<FileSystem*()> Factory; |
896 | |
897 | virtual ~FileSystemRegistry() = default; |
898 | virtual tsl::Status Register(const std::string& scheme, Factory factory) = 0; |
899 | virtual tsl::Status Register(const std::string& scheme, |
900 | std::unique_ptr<FileSystem> filesystem) = 0; |
901 | virtual FileSystem* Lookup(const std::string& scheme) = 0; |
902 | virtual tsl::Status GetRegisteredFileSystemSchemes( |
903 | std::vector<std::string>* schemes) = 0; |
904 | }; |
905 | |
906 | } // namespace tsl |
907 | |
908 | #endif // TENSORFLOW_TSL_PLATFORM_FILE_SYSTEM_H_ |
909 | |