1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/tsl/platform/file_system.h" |
17 | |
18 | #include <sys/stat.h> |
19 | |
20 | #include <algorithm> |
21 | #include <deque> |
22 | #include <string> |
23 | #include <utility> |
24 | #include <vector> |
25 | |
26 | #if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \ |
27 | defined(PLATFORM_GOOGLE) |
28 | #include <fnmatch.h> |
29 | #else |
30 | #include "tensorflow/tsl/platform/regexp.h" |
31 | #endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \ |
32 | // defined(PLATFORM_GOOGLE) |
33 | |
34 | #include "tensorflow/tsl/platform/env.h" |
35 | #include "tensorflow/tsl/platform/errors.h" |
36 | #include "tensorflow/tsl/platform/platform.h" |
37 | #include "tensorflow/tsl/platform/scanner.h" |
38 | #include "tensorflow/tsl/platform/str_util.h" |
39 | #include "tensorflow/tsl/platform/strcat.h" |
40 | |
41 | namespace tsl { |
42 | |
43 | bool FileSystem::Match(const string& filename, const string& pattern) { |
44 | #if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \ |
45 | defined(PLATFORM_GOOGLE) |
46 | // We avoid relying on RE2 on mobile platforms, because it incurs a |
47 | // significant binary size increase. |
48 | // For POSIX platforms, there is no need to depend on RE2 if `fnmatch` can be |
49 | // used safely. |
50 | return fnmatch(pattern.c_str(), filename.c_str(), FNM_PATHNAME) == 0; |
51 | #else |
52 | string regexp(pattern); |
53 | regexp = str_util::StringReplace(regexp, "*" , "[^/]*" , true); |
54 | regexp = str_util::StringReplace(regexp, "?" , "." , true); |
55 | regexp = str_util::StringReplace(regexp, "(" , "\\(" , true); |
56 | regexp = str_util::StringReplace(regexp, ")" , "\\)" , true); |
57 | return RE2::FullMatch(filename, regexp); |
58 | #endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) || \ |
59 | // defined(PLATFORM_GOOGLE) |
60 | } |
61 | |
62 | string FileSystem::TranslateName(const string& name) const { |
63 | // If the name is empty, CleanPath returns "." which is incorrect and |
64 | // we should return the empty path instead. |
65 | if (name.empty()) return name; |
66 | |
67 | // Otherwise, properly separate the URI components and clean the path one |
68 | StringPiece scheme, host, path; |
69 | this->ParseURI(name, &scheme, &host, &path); |
70 | |
71 | // If `path` becomes empty, return `/` (`file://` should be `/`), not `.`. |
72 | if (path.empty()) return "/" ; |
73 | |
74 | return this->CleanPath(path); |
75 | } |
76 | |
77 | Status FileSystem::IsDirectory(const string& name, TransactionToken* token) { |
78 | // Check if path exists. |
79 | // TODO(sami):Forward token to other methods once migration is complete. |
80 | TF_RETURN_IF_ERROR(FileExists(name)); |
81 | FileStatistics stat; |
82 | TF_RETURN_IF_ERROR(Stat(name, &stat)); |
83 | if (stat.is_directory) { |
84 | return OkStatus(); |
85 | } |
86 | return Status(tsl::error::FAILED_PRECONDITION, "Not a directory" ); |
87 | } |
88 | |
89 | Status FileSystem::HasAtomicMove(const string& path, bool* has_atomic_move) { |
90 | *has_atomic_move = true; |
91 | return OkStatus(); |
92 | } |
93 | |
94 | void FileSystem::FlushCaches(TransactionToken* token) {} |
95 | |
96 | bool FileSystem::FilesExist(const std::vector<string>& files, |
97 | TransactionToken* token, |
98 | std::vector<Status>* status) { |
99 | bool result = true; |
100 | for (const auto& file : files) { |
101 | Status s = FileExists(file); |
102 | result &= s.ok(); |
103 | if (status != nullptr) { |
104 | status->push_back(s); |
105 | } else if (!result) { |
106 | // Return early since there is no need to check other files. |
107 | return false; |
108 | } |
109 | } |
110 | return result; |
111 | } |
112 | |
113 | Status FileSystem::DeleteRecursively(const string& dirname, |
114 | TransactionToken* token, |
115 | int64_t* undeleted_files, |
116 | int64_t* undeleted_dirs) { |
117 | CHECK_NOTNULL(undeleted_files); |
118 | CHECK_NOTNULL(undeleted_dirs); |
119 | |
120 | *undeleted_files = 0; |
121 | *undeleted_dirs = 0; |
122 | // Make sure that dirname exists; |
123 | Status exists_status = FileExists(dirname); |
124 | if (!exists_status.ok()) { |
125 | (*undeleted_dirs)++; |
126 | return exists_status; |
127 | } |
128 | |
129 | // If given path to a single file, we should just delete it. |
130 | if (!IsDirectory(dirname).ok()) { |
131 | Status delete_root_status = DeleteFile(dirname); |
132 | if (!delete_root_status.ok()) (*undeleted_files)++; |
133 | return delete_root_status; |
134 | } |
135 | |
136 | std::deque<string> dir_q; // Queue for the BFS |
137 | std::vector<string> dir_list; // List of all dirs discovered |
138 | dir_q.push_back(dirname); |
139 | Status ret; // Status to be returned. |
140 | // Do a BFS on the directory to discover all the sub-directories. Remove all |
141 | // children that are files along the way. Then cleanup and remove the |
142 | // directories in reverse order.; |
143 | while (!dir_q.empty()) { |
144 | string dir = dir_q.front(); |
145 | dir_q.pop_front(); |
146 | dir_list.push_back(dir); |
147 | std::vector<string> children; |
148 | // GetChildren might fail if we don't have appropriate permissions. |
149 | Status s = GetChildren(dir, &children); |
150 | ret.Update(s); |
151 | if (!s.ok()) { |
152 | (*undeleted_dirs)++; |
153 | continue; |
154 | } |
155 | for (const string& child : children) { |
156 | const string child_path = this->JoinPath(dir, child); |
157 | // If the child is a directory add it to the queue, otherwise delete it. |
158 | if (IsDirectory(child_path).ok()) { |
159 | dir_q.push_back(child_path); |
160 | } else { |
161 | // Delete file might fail because of permissions issues or might be |
162 | // unimplemented. |
163 | Status del_status = DeleteFile(child_path); |
164 | ret.Update(del_status); |
165 | if (!del_status.ok()) { |
166 | (*undeleted_files)++; |
167 | } |
168 | } |
169 | } |
170 | } |
171 | // Now reverse the list of directories and delete them. The BFS ensures that |
172 | // we can delete the directories in this order. |
173 | std::reverse(dir_list.begin(), dir_list.end()); |
174 | for (const string& dir : dir_list) { |
175 | // Delete dir might fail because of permissions issues or might be |
176 | // unimplemented. |
177 | Status s = DeleteDir(dir); |
178 | ret.Update(s); |
179 | if (!s.ok()) { |
180 | (*undeleted_dirs)++; |
181 | } |
182 | } |
183 | return ret; |
184 | } |
185 | |
186 | Status FileSystem::RecursivelyCreateDir(const string& dirname, |
187 | TransactionToken* token) { |
188 | StringPiece scheme, host, remaining_dir; |
189 | this->ParseURI(dirname, &scheme, &host, &remaining_dir); |
190 | std::vector<StringPiece> sub_dirs; |
191 | while (!remaining_dir.empty()) { |
192 | std::string current_entry = this->CreateURI(scheme, host, remaining_dir); |
193 | Status exists_status = FileExists(current_entry); |
194 | if (exists_status.ok()) { |
195 | // FileExists cannot differentiate between existence of a file or a |
196 | // directory, hence we need an additional test as we must not assume that |
197 | // a path to a file is a path to a parent directory. |
198 | Status directory_status = IsDirectory(current_entry); |
199 | if (directory_status.ok()) { |
200 | break; // We need to start creating directories from here. |
201 | } else if (directory_status.code() == tsl::error::UNIMPLEMENTED) { |
202 | return directory_status; |
203 | } else { |
204 | return errors::FailedPrecondition(remaining_dir, " is not a directory" ); |
205 | } |
206 | } |
207 | if (exists_status.code() != error::Code::NOT_FOUND) { |
208 | return exists_status; |
209 | } |
210 | // Basename returns "" for / ending dirs. |
211 | if (!str_util::EndsWith(remaining_dir, "/" )) { |
212 | sub_dirs.push_back(this->Basename(remaining_dir)); |
213 | } |
214 | remaining_dir = this->Dirname(remaining_dir); |
215 | } |
216 | |
217 | // sub_dirs contains all the dirs to be created but in reverse order. |
218 | std::reverse(sub_dirs.begin(), sub_dirs.end()); |
219 | |
220 | // Now create the directories. |
221 | string built_path(remaining_dir); |
222 | for (const StringPiece sub_dir : sub_dirs) { |
223 | built_path = this->JoinPath(built_path, sub_dir); |
224 | Status status = CreateDir(this->CreateURI(scheme, host, built_path)); |
225 | if (!status.ok() && status.code() != tsl::error::ALREADY_EXISTS) { |
226 | return status; |
227 | } |
228 | } |
229 | return OkStatus(); |
230 | } |
231 | |
232 | Status FileSystem::CopyFile(const string& src, const string& target, |
233 | TransactionToken* token) { |
234 | return FileSystemCopyFile(this, src, this, target); |
235 | } |
236 | |
237 | char FileSystem::Separator() const { return '/'; } |
238 | |
239 | string FileSystem::JoinPathImpl(std::initializer_list<StringPiece> paths) { |
240 | string result; |
241 | |
242 | for (StringPiece path : paths) { |
243 | if (path.empty()) continue; |
244 | |
245 | if (result.empty()) { |
246 | result = string(path); |
247 | continue; |
248 | } |
249 | |
250 | if (result[result.size() - 1] == '/') { |
251 | if (this->IsAbsolutePath(path)) { |
252 | strings::StrAppend(&result, path.substr(1)); |
253 | } else { |
254 | strings::StrAppend(&result, path); |
255 | } |
256 | } else { |
257 | if (this->IsAbsolutePath(path)) { |
258 | strings::StrAppend(&result, path); |
259 | } else { |
260 | strings::StrAppend(&result, "/" , path); |
261 | } |
262 | } |
263 | } |
264 | |
265 | return result; |
266 | } |
267 | |
268 | std::pair<StringPiece, StringPiece> FileSystem::SplitPath( |
269 | StringPiece uri) const { |
270 | StringPiece scheme, host, path; |
271 | ParseURI(uri, &scheme, &host, &path); |
272 | |
273 | // We have 3 cases of results from `ParseURI`: |
274 | // |
275 | // 1. `path` is empty (`uri` is something like http://google.com/) |
276 | // Here, we don't have anything to split, so return empty components |
277 | // |
278 | // 2. all 3 components are non-empty (`uri` is something like |
279 | // http://google.com/path/to/resource) |
280 | // Here, all 3 components point to elements inside the same buffer as |
281 | // `uri`. In the given example, `scheme` contains `http://`, `host` |
282 | // contains `google.com/` and `path` contains `path/to/resource`. |
283 | // Since all 3 components point to the same buffer, we can do arithmetic |
284 | // such as `host.end() - uri.begin()` because we know for sure that |
285 | // `host` starts after `uri`. |
286 | // |
287 | // 3. `scheme` and `host` are empty (`uri` is local file, like /etc/passwd) |
288 | // Here, we split `path`, but we need to be careful with pointer |
289 | // arithmetic. Here we only know that `path` and `uri` represent the |
290 | // exact same buffer. |
291 | // |
292 | // To summarize, if `path` is empty there is nothing to return, in all other |
293 | // cases we can do arithmetic involving `path` and `uri` but if |
294 | // `host`/`scheme` are involved we need to make sure these are not empty. |
295 | |
296 | // Case 1 above |
297 | if (path.empty()) { |
298 | return std::make_pair(StringPiece(), StringPiece()); |
299 | } |
300 | |
301 | size_t pos = path.rfind(this->Separator()); |
302 | |
303 | // Our code assumes it is written for linux too many times. So, for windows |
304 | // also check for '/' |
305 | #ifdef PLATFORM_WINDOWS |
306 | size_t pos2 = path.rfind('/'); |
307 | // Pick the max value that is not string::npos. |
308 | if (pos == string::npos) { |
309 | pos = pos2; |
310 | } else { |
311 | if (pos2 != string::npos) { |
312 | pos = pos > pos2 ? pos : pos2; |
313 | } |
314 | } |
315 | #endif |
316 | |
317 | // Handle the case with no SEP in 'path'. |
318 | if (pos == StringPiece::npos) { |
319 | if (host.empty()) { |
320 | // Case 3 above, `uri` and `path` point to the same thing |
321 | // We are returning all of the `path` as basename here. |
322 | return std::make_pair(StringPiece(), path); |
323 | } |
324 | |
325 | // Safe to do this arithmetic here, we are in case 2 above |
326 | return std::make_pair(StringPiece(uri.data(), host.end() - uri.begin()), |
327 | path); |
328 | } |
329 | |
330 | // Handle the case with a single leading '/' in 'path'. |
331 | if (pos == 0) { |
332 | return std::make_pair( |
333 | StringPiece(uri.data(), path.begin() + 1 - uri.begin()), |
334 | StringPiece(path.data() + 1, path.size() - 1)); |
335 | } |
336 | |
337 | return std::make_pair( |
338 | StringPiece(uri.data(), path.begin() + pos - uri.begin()), |
339 | StringPiece(path.data() + pos + 1, path.size() - (pos + 1))); |
340 | } |
341 | |
342 | bool FileSystem::IsAbsolutePath(StringPiece path) const { |
343 | return !path.empty() && path[0] == '/'; |
344 | } |
345 | |
346 | StringPiece FileSystem::Dirname(StringPiece path) const { |
347 | return this->SplitPath(path).first; |
348 | } |
349 | |
350 | StringPiece FileSystem::Basename(StringPiece path) const { |
351 | return this->SplitPath(path).second; |
352 | } |
353 | |
354 | StringPiece FileSystem::Extension(StringPiece path) const { |
355 | StringPiece basename = this->Basename(path); |
356 | |
357 | size_t pos = basename.rfind('.'); |
358 | if (pos == StringPiece::npos) { |
359 | return StringPiece(path.data() + path.size(), 0); |
360 | } else { |
361 | return StringPiece(path.data() + pos + 1, path.size() - (pos + 1)); |
362 | } |
363 | } |
364 | |
365 | string FileSystem::CleanPath(StringPiece unclean_path) const { |
366 | string path(unclean_path); |
367 | const char* src = path.c_str(); |
368 | string::iterator dst = path.begin(); |
369 | |
370 | // Check for absolute path and determine initial backtrack limit. |
371 | const bool is_absolute_path = *src == '/'; |
372 | if (is_absolute_path) { |
373 | *dst++ = *src++; |
374 | while (*src == '/') ++src; |
375 | } |
376 | string::const_iterator backtrack_limit = dst; |
377 | |
378 | // Process all parts |
379 | while (*src) { |
380 | bool parsed = false; |
381 | |
382 | if (src[0] == '.') { |
383 | // 1dot ".<whateverisnext>", check for END or SEP. |
384 | if (src[1] == '/' || !src[1]) { |
385 | if (*++src) { |
386 | ++src; |
387 | } |
388 | parsed = true; |
389 | } else if (src[1] == '.' && (src[2] == '/' || !src[2])) { |
390 | // 2dot END or SEP (".." | "../<whateverisnext>"). |
391 | src += 2; |
392 | if (dst != backtrack_limit) { |
393 | // We can backtrack the previous part |
394 | for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) { |
395 | // Empty. |
396 | } |
397 | } else if (!is_absolute_path) { |
398 | // Failed to backtrack and we can't skip it either. Rewind and copy. |
399 | src -= 2; |
400 | *dst++ = *src++; |
401 | *dst++ = *src++; |
402 | if (*src) { |
403 | *dst++ = *src; |
404 | } |
405 | // We can never backtrack over a copied "../" part so set new limit. |
406 | backtrack_limit = dst; |
407 | } |
408 | if (*src) { |
409 | ++src; |
410 | } |
411 | parsed = true; |
412 | } |
413 | } |
414 | |
415 | // If not parsed, copy entire part until the next SEP or EOS. |
416 | if (!parsed) { |
417 | while (*src && *src != '/') { |
418 | *dst++ = *src++; |
419 | } |
420 | if (*src) { |
421 | *dst++ = *src++; |
422 | } |
423 | } |
424 | |
425 | // Skip consecutive SEP occurrences |
426 | while (*src == '/') { |
427 | ++src; |
428 | } |
429 | } |
430 | |
431 | // Calculate and check the length of the cleaned path. |
432 | string::difference_type path_length = dst - path.begin(); |
433 | if (path_length != 0) { |
434 | // Remove trailing '/' except if it is root path ("/" ==> path_length := 1) |
435 | if (path_length > 1 && path[path_length - 1] == '/') { |
436 | --path_length; |
437 | } |
438 | path.resize(path_length); |
439 | } else { |
440 | // The cleaned path is empty; assign "." as per the spec. |
441 | path.assign(1, '.'); |
442 | } |
443 | return path; |
444 | } |
445 | |
446 | void FileSystem::ParseURI(StringPiece remaining, StringPiece* scheme, |
447 | StringPiece* host, StringPiece* path) const { |
448 | // 0. Parse scheme |
449 | // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]* |
450 | // TODO(keveman): Allow "+" and "-" in the scheme. |
451 | // Keep URI pattern in tensorboard/backend/server.py updated accordingly |
452 | if (!strings::Scanner(remaining) |
453 | .One(strings::Scanner::LETTER) |
454 | .Many(strings::Scanner::LETTER_DIGIT_DOT) |
455 | .StopCapture() |
456 | .OneLiteral("://" ) |
457 | .GetResult(&remaining, scheme)) { |
458 | // If there's no scheme, assume the entire string is a path. |
459 | *scheme = StringPiece(); |
460 | *host = StringPiece(); |
461 | *path = remaining; |
462 | return; |
463 | } |
464 | |
465 | // 1. Parse host |
466 | if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) { |
467 | // No path, so the rest of the URI is the host. |
468 | *host = remaining; |
469 | *path = StringPiece(); |
470 | return; |
471 | } |
472 | |
473 | // 2. The rest is the path |
474 | *path = remaining; |
475 | } |
476 | |
477 | string FileSystem::CreateURI(StringPiece scheme, StringPiece host, |
478 | StringPiece path) const { |
479 | if (scheme.empty()) { |
480 | return string(path); |
481 | } |
482 | return strings::StrCat(scheme, "://" , host, path); |
483 | } |
484 | |
485 | std::string FileSystem::DecodeTransaction(const TransactionToken* token) { |
486 | // TODO(sami): Switch using StrCat when void* is supported |
487 | if (token) { |
488 | std::stringstream oss; |
489 | oss << "Token= " << token->token << ", Owner=" << token->owner; |
490 | return oss.str(); |
491 | } |
492 | return "No Transaction" ; |
493 | } |
494 | |
495 | } // namespace tsl |
496 | |