1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/tsl/platform/file_system.h"
17
18#include <sys/stat.h>
19
20#include <algorithm>
21#include <deque>
22#include <string>
23#include <utility>
24#include <vector>
25
26#if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \
27 defined(PLATFORM_GOOGLE)
28#include <fnmatch.h>
29#else
30#include "tensorflow/tsl/platform/regexp.h"
31#endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \
32 // defined(PLATFORM_GOOGLE)
33
34#include "tensorflow/tsl/platform/env.h"
35#include "tensorflow/tsl/platform/errors.h"
36#include "tensorflow/tsl/platform/platform.h"
37#include "tensorflow/tsl/platform/scanner.h"
38#include "tensorflow/tsl/platform/str_util.h"
39#include "tensorflow/tsl/platform/strcat.h"
40
41namespace tsl {
42
43bool FileSystem::Match(const string& filename, const string& pattern) {
44#if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \
45 defined(PLATFORM_GOOGLE)
46 // We avoid relying on RE2 on mobile platforms, because it incurs a
47 // significant binary size increase.
48 // For POSIX platforms, there is no need to depend on RE2 if `fnmatch` can be
49 // used safely.
50 return fnmatch(pattern.c_str(), filename.c_str(), FNM_PATHNAME) == 0;
51#else
52 string regexp(pattern);
53 regexp = str_util::StringReplace(regexp, "*", "[^/]*", true);
54 regexp = str_util::StringReplace(regexp, "?", ".", true);
55 regexp = str_util::StringReplace(regexp, "(", "\\(", true);
56 regexp = str_util::StringReplace(regexp, ")", "\\)", true);
57 return RE2::FullMatch(filename, regexp);
58#endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \
59 // defined(PLATFORM_GOOGLE)
60}
61
62string FileSystem::TranslateName(const string& name) const {
63 // If the name is empty, CleanPath returns "." which is incorrect and
64 // we should return the empty path instead.
65 if (name.empty()) return name;
66
67 // Otherwise, properly separate the URI components and clean the path one
68 StringPiece scheme, host, path;
69 this->ParseURI(name, &scheme, &host, &path);
70
71 // If `path` becomes empty, return `/` (`file://` should be `/`), not `.`.
72 if (path.empty()) return "/";
73
74 return this->CleanPath(path);
75}
76
77Status FileSystem::IsDirectory(const string& name, TransactionToken* token) {
78 // Check if path exists.
79 // TODO(sami):Forward token to other methods once migration is complete.
80 TF_RETURN_IF_ERROR(FileExists(name));
81 FileStatistics stat;
82 TF_RETURN_IF_ERROR(Stat(name, &stat));
83 if (stat.is_directory) {
84 return OkStatus();
85 }
86 return Status(tsl::error::FAILED_PRECONDITION, "Not a directory");
87}
88
89Status FileSystem::HasAtomicMove(const string& path, bool* has_atomic_move) {
90 *has_atomic_move = true;
91 return OkStatus();
92}
93
94void FileSystem::FlushCaches(TransactionToken* token) {}
95
96bool FileSystem::FilesExist(const std::vector<string>& files,
97 TransactionToken* token,
98 std::vector<Status>* status) {
99 bool result = true;
100 for (const auto& file : files) {
101 Status s = FileExists(file);
102 result &= s.ok();
103 if (status != nullptr) {
104 status->push_back(s);
105 } else if (!result) {
106 // Return early since there is no need to check other files.
107 return false;
108 }
109 }
110 return result;
111}
112
113Status FileSystem::DeleteRecursively(const string& dirname,
114 TransactionToken* token,
115 int64_t* undeleted_files,
116 int64_t* undeleted_dirs) {
117 CHECK_NOTNULL(undeleted_files);
118 CHECK_NOTNULL(undeleted_dirs);
119
120 *undeleted_files = 0;
121 *undeleted_dirs = 0;
122 // Make sure that dirname exists;
123 Status exists_status = FileExists(dirname);
124 if (!exists_status.ok()) {
125 (*undeleted_dirs)++;
126 return exists_status;
127 }
128
129 // If given path to a single file, we should just delete it.
130 if (!IsDirectory(dirname).ok()) {
131 Status delete_root_status = DeleteFile(dirname);
132 if (!delete_root_status.ok()) (*undeleted_files)++;
133 return delete_root_status;
134 }
135
136 std::deque<string> dir_q; // Queue for the BFS
137 std::vector<string> dir_list; // List of all dirs discovered
138 dir_q.push_back(dirname);
139 Status ret; // Status to be returned.
140 // Do a BFS on the directory to discover all the sub-directories. Remove all
141 // children that are files along the way. Then cleanup and remove the
142 // directories in reverse order.;
143 while (!dir_q.empty()) {
144 string dir = dir_q.front();
145 dir_q.pop_front();
146 dir_list.push_back(dir);
147 std::vector<string> children;
148 // GetChildren might fail if we don't have appropriate permissions.
149 Status s = GetChildren(dir, &children);
150 ret.Update(s);
151 if (!s.ok()) {
152 (*undeleted_dirs)++;
153 continue;
154 }
155 for (const string& child : children) {
156 const string child_path = this->JoinPath(dir, child);
157 // If the child is a directory add it to the queue, otherwise delete it.
158 if (IsDirectory(child_path).ok()) {
159 dir_q.push_back(child_path);
160 } else {
161 // Delete file might fail because of permissions issues or might be
162 // unimplemented.
163 Status del_status = DeleteFile(child_path);
164 ret.Update(del_status);
165 if (!del_status.ok()) {
166 (*undeleted_files)++;
167 }
168 }
169 }
170 }
171 // Now reverse the list of directories and delete them. The BFS ensures that
172 // we can delete the directories in this order.
173 std::reverse(dir_list.begin(), dir_list.end());
174 for (const string& dir : dir_list) {
175 // Delete dir might fail because of permissions issues or might be
176 // unimplemented.
177 Status s = DeleteDir(dir);
178 ret.Update(s);
179 if (!s.ok()) {
180 (*undeleted_dirs)++;
181 }
182 }
183 return ret;
184}
185
186Status FileSystem::RecursivelyCreateDir(const string& dirname,
187 TransactionToken* token) {
188 StringPiece scheme, host, remaining_dir;
189 this->ParseURI(dirname, &scheme, &host, &remaining_dir);
190 std::vector<StringPiece> sub_dirs;
191 while (!remaining_dir.empty()) {
192 std::string current_entry = this->CreateURI(scheme, host, remaining_dir);
193 Status exists_status = FileExists(current_entry);
194 if (exists_status.ok()) {
195 // FileExists cannot differentiate between existence of a file or a
196 // directory, hence we need an additional test as we must not assume that
197 // a path to a file is a path to a parent directory.
198 Status directory_status = IsDirectory(current_entry);
199 if (directory_status.ok()) {
200 break; // We need to start creating directories from here.
201 } else if (directory_status.code() == tsl::error::UNIMPLEMENTED) {
202 return directory_status;
203 } else {
204 return errors::FailedPrecondition(remaining_dir, " is not a directory");
205 }
206 }
207 if (exists_status.code() != error::Code::NOT_FOUND) {
208 return exists_status;
209 }
210 // Basename returns "" for / ending dirs.
211 if (!str_util::EndsWith(remaining_dir, "/")) {
212 sub_dirs.push_back(this->Basename(remaining_dir));
213 }
214 remaining_dir = this->Dirname(remaining_dir);
215 }
216
217 // sub_dirs contains all the dirs to be created but in reverse order.
218 std::reverse(sub_dirs.begin(), sub_dirs.end());
219
220 // Now create the directories.
221 string built_path(remaining_dir);
222 for (const StringPiece sub_dir : sub_dirs) {
223 built_path = this->JoinPath(built_path, sub_dir);
224 Status status = CreateDir(this->CreateURI(scheme, host, built_path));
225 if (!status.ok() && status.code() != tsl::error::ALREADY_EXISTS) {
226 return status;
227 }
228 }
229 return OkStatus();
230}
231
232Status FileSystem::CopyFile(const string& src, const string& target,
233 TransactionToken* token) {
234 return FileSystemCopyFile(this, src, this, target);
235}
236
237char FileSystem::Separator() const { return '/'; }
238
239string FileSystem::JoinPathImpl(std::initializer_list<StringPiece> paths) {
240 string result;
241
242 for (StringPiece path : paths) {
243 if (path.empty()) continue;
244
245 if (result.empty()) {
246 result = string(path);
247 continue;
248 }
249
250 if (result[result.size() - 1] == '/') {
251 if (this->IsAbsolutePath(path)) {
252 strings::StrAppend(&result, path.substr(1));
253 } else {
254 strings::StrAppend(&result, path);
255 }
256 } else {
257 if (this->IsAbsolutePath(path)) {
258 strings::StrAppend(&result, path);
259 } else {
260 strings::StrAppend(&result, "/", path);
261 }
262 }
263 }
264
265 return result;
266}
267
268std::pair<StringPiece, StringPiece> FileSystem::SplitPath(
269 StringPiece uri) const {
270 StringPiece scheme, host, path;
271 ParseURI(uri, &scheme, &host, &path);
272
273 // We have 3 cases of results from `ParseURI`:
274 //
275 // 1. `path` is empty (`uri` is something like http://google.com/)
276 // Here, we don't have anything to split, so return empty components
277 //
278 // 2. all 3 components are non-empty (`uri` is something like
279 // http://google.com/path/to/resource)
280 // Here, all 3 components point to elements inside the same buffer as
281 // `uri`. In the given example, `scheme` contains `http://`, `host`
282 // contains `google.com/` and `path` contains `path/to/resource`.
283 // Since all 3 components point to the same buffer, we can do arithmetic
284 // such as `host.end() - uri.begin()` because we know for sure that
285 // `host` starts after `uri`.
286 //
287 // 3. `scheme` and `host` are empty (`uri` is local file, like /etc/passwd)
288 // Here, we split `path`, but we need to be careful with pointer
289 // arithmetic. Here we only know that `path` and `uri` represent the
290 // exact same buffer.
291 //
292 // To summarize, if `path` is empty there is nothing to return, in all other
293 // cases we can do arithmetic involving `path` and `uri` but if
294 // `host`/`scheme` are involved we need to make sure these are not empty.
295
296 // Case 1 above
297 if (path.empty()) {
298 return std::make_pair(StringPiece(), StringPiece());
299 }
300
301 size_t pos = path.rfind(this->Separator());
302
303 // Our code assumes it is written for linux too many times. So, for windows
304 // also check for '/'
305#ifdef PLATFORM_WINDOWS
306 size_t pos2 = path.rfind('/');
307 // Pick the max value that is not string::npos.
308 if (pos == string::npos) {
309 pos = pos2;
310 } else {
311 if (pos2 != string::npos) {
312 pos = pos > pos2 ? pos : pos2;
313 }
314 }
315#endif
316
317 // Handle the case with no SEP in 'path'.
318 if (pos == StringPiece::npos) {
319 if (host.empty()) {
320 // Case 3 above, `uri` and `path` point to the same thing
321 // We are returning all of the `path` as basename here.
322 return std::make_pair(StringPiece(), path);
323 }
324
325 // Safe to do this arithmetic here, we are in case 2 above
326 return std::make_pair(StringPiece(uri.data(), host.end() - uri.begin()),
327 path);
328 }
329
330 // Handle the case with a single leading '/' in 'path'.
331 if (pos == 0) {
332 return std::make_pair(
333 StringPiece(uri.data(), path.begin() + 1 - uri.begin()),
334 StringPiece(path.data() + 1, path.size() - 1));
335 }
336
337 return std::make_pair(
338 StringPiece(uri.data(), path.begin() + pos - uri.begin()),
339 StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
340}
341
342bool FileSystem::IsAbsolutePath(StringPiece path) const {
343 return !path.empty() && path[0] == '/';
344}
345
346StringPiece FileSystem::Dirname(StringPiece path) const {
347 return this->SplitPath(path).first;
348}
349
350StringPiece FileSystem::Basename(StringPiece path) const {
351 return this->SplitPath(path).second;
352}
353
354StringPiece FileSystem::Extension(StringPiece path) const {
355 StringPiece basename = this->Basename(path);
356
357 size_t pos = basename.rfind('.');
358 if (pos == StringPiece::npos) {
359 return StringPiece(path.data() + path.size(), 0);
360 } else {
361 return StringPiece(path.data() + pos + 1, path.size() - (pos + 1));
362 }
363}
364
365string FileSystem::CleanPath(StringPiece unclean_path) const {
366 string path(unclean_path);
367 const char* src = path.c_str();
368 string::iterator dst = path.begin();
369
370 // Check for absolute path and determine initial backtrack limit.
371 const bool is_absolute_path = *src == '/';
372 if (is_absolute_path) {
373 *dst++ = *src++;
374 while (*src == '/') ++src;
375 }
376 string::const_iterator backtrack_limit = dst;
377
378 // Process all parts
379 while (*src) {
380 bool parsed = false;
381
382 if (src[0] == '.') {
383 // 1dot ".<whateverisnext>", check for END or SEP.
384 if (src[1] == '/' || !src[1]) {
385 if (*++src) {
386 ++src;
387 }
388 parsed = true;
389 } else if (src[1] == '.' && (src[2] == '/' || !src[2])) {
390 // 2dot END or SEP (".." | "../<whateverisnext>").
391 src += 2;
392 if (dst != backtrack_limit) {
393 // We can backtrack the previous part
394 for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) {
395 // Empty.
396 }
397 } else if (!is_absolute_path) {
398 // Failed to backtrack and we can't skip it either. Rewind and copy.
399 src -= 2;
400 *dst++ = *src++;
401 *dst++ = *src++;
402 if (*src) {
403 *dst++ = *src;
404 }
405 // We can never backtrack over a copied "../" part so set new limit.
406 backtrack_limit = dst;
407 }
408 if (*src) {
409 ++src;
410 }
411 parsed = true;
412 }
413 }
414
415 // If not parsed, copy entire part until the next SEP or EOS.
416 if (!parsed) {
417 while (*src && *src != '/') {
418 *dst++ = *src++;
419 }
420 if (*src) {
421 *dst++ = *src++;
422 }
423 }
424
425 // Skip consecutive SEP occurrences
426 while (*src == '/') {
427 ++src;
428 }
429 }
430
431 // Calculate and check the length of the cleaned path.
432 string::difference_type path_length = dst - path.begin();
433 if (path_length != 0) {
434 // Remove trailing '/' except if it is root path ("/" ==> path_length := 1)
435 if (path_length > 1 && path[path_length - 1] == '/') {
436 --path_length;
437 }
438 path.resize(path_length);
439 } else {
440 // The cleaned path is empty; assign "." as per the spec.
441 path.assign(1, '.');
442 }
443 return path;
444}
445
446void FileSystem::ParseURI(StringPiece remaining, StringPiece* scheme,
447 StringPiece* host, StringPiece* path) const {
448 // 0. Parse scheme
449 // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
450 // TODO(keveman): Allow "+" and "-" in the scheme.
451 // Keep URI pattern in tensorboard/backend/server.py updated accordingly
452 if (!strings::Scanner(remaining)
453 .One(strings::Scanner::LETTER)
454 .Many(strings::Scanner::LETTER_DIGIT_DOT)
455 .StopCapture()
456 .OneLiteral("://")
457 .GetResult(&remaining, scheme)) {
458 // If there's no scheme, assume the entire string is a path.
459 *scheme = StringPiece();
460 *host = StringPiece();
461 *path = remaining;
462 return;
463 }
464
465 // 1. Parse host
466 if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
467 // No path, so the rest of the URI is the host.
468 *host = remaining;
469 *path = StringPiece();
470 return;
471 }
472
473 // 2. The rest is the path
474 *path = remaining;
475}
476
477string FileSystem::CreateURI(StringPiece scheme, StringPiece host,
478 StringPiece path) const {
479 if (scheme.empty()) {
480 return string(path);
481 }
482 return strings::StrCat(scheme, "://", host, path);
483}
484
485std::string FileSystem::DecodeTransaction(const TransactionToken* token) {
486 // TODO(sami): Switch using StrCat when void* is supported
487 if (token) {
488 std::stringstream oss;
489 oss << "Token= " << token->token << ", Owner=" << token->owner;
490 return oss.str();
491 }
492 return "No Transaction";
493}
494
495} // namespace tsl
496