file_system.cc source code [tensorflow/tensorflow/tsl/platform/file_system.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "tensorflow/tsl/platform/file_system.h"
17
18	#include <sys/stat.h>
19
20	#include <algorithm>
21	#include <deque>
22	#include <string>
23	#include <utility>
24	#include <vector>
25
26	#if defined(PLATFORM_POSIX) \|\| defined(IS_MOBILE_PLATFORM) \|\| \
27	defined(PLATFORM_GOOGLE)
28	#include <fnmatch.h>
29	#else
30	#include "tensorflow/tsl/platform/regexp.h"
31	#endif // defined(PLATFORM_POSIX) \|\| defined(IS_MOBILE_PLATFORM) \|\| \
32	// defined(PLATFORM_GOOGLE)
33
34	#include "tensorflow/tsl/platform/env.h"
35	#include "tensorflow/tsl/platform/errors.h"
36	#include "tensorflow/tsl/platform/platform.h"
37	#include "tensorflow/tsl/platform/scanner.h"
38	#include "tensorflow/tsl/platform/str_util.h"
39	#include "tensorflow/tsl/platform/strcat.h"
40
41	namespace tsl {
42
43	bool FileSystem::Match(const string& filename, const string& pattern) {
44	#if defined(PLATFORM_POSIX) \|\| defined(IS_MOBILE_PLATFORM) \|\| \
45	defined(PLATFORM_GOOGLE)
46	// We avoid relying on RE2 on mobile platforms, because it incurs a
47	// significant binary size increase.
48	// For POSIX platforms, there is no need to depend on RE2 if `fnmatch` can be
49	// used safely.
50	return fnmatch(pattern.c_str(), filename.c_str(), FNM_PATHNAME) == `0`;
51	#else
52	string regexp(pattern);
53	regexp = str_util::StringReplace(regexp, "", "[^/]", true);
54	regexp = str_util::StringReplace(regexp, "?", ".", true);
55	regexp = str_util::StringReplace(regexp, "(", "\\(", true);
56	regexp = str_util::StringReplace(regexp, ")", "\\)", true);
57	return RE2::FullMatch(filename, regexp);
58	#endif // defined(PLATFORM_POSIX) \|\| defined(IS_MOBILE_PLATFORM) \|\| \
59	// defined(PLATFORM_GOOGLE)
60	}
61
62	string FileSystem::TranslateName(const string& name) const {
63	// If the name is empty, CleanPath returns "." which is incorrect and
64	// we should return the empty path instead.
65	if (name.empty()) return name;
66
67	// Otherwise, properly separate the URI components and clean the path one
68	StringPiece scheme, host, path;
69	this->ParseURI(name, &scheme, &host, &path);
70
71	// If `path` becomes empty, return `/` (`file://` should be `/`), not `.`.
72	if (path.empty()) return "/";
73
74	return this->CleanPath(path);
75	}
76
77	Status FileSystem::IsDirectory(const string& name, TransactionToken* token) {
78	// Check if path exists.
79	// TODO(sami):Forward token to other methods once migration is complete.
80	TF_RETURN_IF_ERROR(FileExists(name));
81	FileStatistics stat;
82	TF_RETURN_IF_ERROR(Stat(name, &stat));
83	if (stat.is_directory) {
84	return OkStatus();
85	}
86	return Status (tsl::error::FAILED_PRECONDITION, "Not a directory");
87	}
88
89	Status FileSystem::HasAtomicMove(const string& path, bool* has_atomic_move) {
90	has_atomic_move = true*;
91	return OkStatus();
92	}
93
94	void FileSystem::FlushCaches(TransactionToken* token) {}
95
96	bool FileSystem::FilesExist(const std::vector<string>& files,
97	TransactionToken* token,
98	std::vector<Status>* status) {
99	bool result = true;
100	for (const auto& file : files) {
101	Status s = FileExists(file);
102	result &= s.ok();
103	if (status != nullptr) {
104	status->push_back(s);
105	} else if (!result) {
106	// Return early since there is no need to check other files.
107	return false;
108	}
109	}
110	return result;
111	}
112
113	Status FileSystem::DeleteRecursively(const string& dirname,
114	TransactionToken* token,
115	int64_t* undeleted_files,
116	int64_t* undeleted_dirs) {
117	CHECK_NOTNULL(undeleted_files);
118	CHECK_NOTNULL(undeleted_dirs);
119
120	*undeleted_files = `0`;
121	*undeleted_dirs = `0`;
122	// Make sure that dirname exists;
123	Status exists_status = FileExists(dirname);
124	if (!exists_status.ok()) {
125	(*undeleted_dirs)++;
126	return exists_status;
127	}
128
129	// If given path to a single file, we should just delete it.
130	if (!IsDirectory(dirname).ok()) {
131	Status delete_root_status = DeleteFile(dirname);
132	if (!delete_root_status.ok()) (*undeleted_files)++;
133	return delete_root_status;
134	}
135
136	std::deque<string> dir_q; // Queue for the BFS
137	std::vector<string> dir_list; // List of all dirs discovered
138	dir_q.push_back(dirname);
139	Status ret; // Status to be returned.
140	// Do a BFS on the directory to discover all the sub-directories. Remove all
141	// children that are files along the way. Then cleanup and remove the
142	// directories in reverse order.;
143	while (!dir_q.empty()) {
144	string dir = dir_q.front();
145	dir_q.pop_front();
146	dir_list.push_back(dir);
147	std::vector<string> children;
148	// GetChildren might fail if we don't have appropriate permissions.
149	Status s = GetChildren(dir, &children);
150	ret.Update(s);
151	if (!s.ok()) {
152	(*undeleted_dirs)++;
153	continue;
154	}
155	for (const string& child : children) {
156	const string child_path = this->JoinPath(dir, child);
157	// If the child is a directory add it to the queue, otherwise delete it.
158	if (IsDirectory(child_path).ok()) {
159	dir_q.push_back(child_path);
160	} else {
161	// Delete file might fail because of permissions issues or might be
162	// unimplemented.
163	Status del_status = DeleteFile(child_path);
164	ret.Update(del_status);
165	if (!del_status.ok()) {
166	(*undeleted_files)++;
167	}
168	}
169	}
170	}
171	// Now reverse the list of directories and delete them. The BFS ensures that
172	// we can delete the directories in this order.
173	std::reverse(dir_list.begin(), dir_list.end());
174	for (const string& dir : dir_list) {
175	// Delete dir might fail because of permissions issues or might be
176	// unimplemented.
177	Status s = DeleteDir(dir);
178	ret.Update(s);
179	if (!s.ok()) {
180	(*undeleted_dirs)++;
181	}
182	}
183	return ret;
184	}
185
186	Status FileSystem::RecursivelyCreateDir(const string& dirname,
187	TransactionToken* token) {
188	StringPiece scheme, host, remaining_dir;
189	this->ParseURI(dirname, &scheme, &host, &remaining_dir);
190	std::vector<StringPiece> sub_dirs;
191	while (!remaining_dir.empty()) {
192	std::string current_entry = this->CreateURI(scheme, host, remaining_dir);
193	Status exists_status = FileExists(current_entry);
194	if (exists_status.ok()) {
195	// FileExists cannot differentiate between existence of a file or a
196	// directory, hence we need an additional test as we must not assume that
197	// a path to a file is a path to a parent directory.
198	Status directory_status = IsDirectory(current_entry);
199	if (directory_status.ok()) {
200	break; // We need to start creating directories from here.
201	} else if (directory_status.code() == tsl::error::UNIMPLEMENTED) {
202	return directory_status;
203	} else {
204	return errors::FailedPrecondition(remaining_dir, " is not a directory");
205	}
206	}
207	if (exists_status.code() != error::Code::NOT_FOUND) {
208	return exists_status;
209	}
210	// Basename returns "" for / ending dirs.
211	if (!str_util::EndsWith(remaining_dir, "/")) {
212	sub_dirs.push_back(this->Basename(remaining_dir));
213	}
214	remaining_dir = this->Dirname(remaining_dir);
215	}
216
217	// sub_dirs contains all the dirs to be created but in reverse order.
218	std::reverse(sub_dirs.begin(), sub_dirs.end());
219
220	// Now create the directories.
221	string built_path(remaining_dir);
222	for (const StringPiece sub_dir : sub_dirs) {
223	built_path = this->JoinPath(built_path, sub_dir);
224	Status status = CreateDir(this->CreateURI(scheme, host, built_path));
225	if (!status.ok() && status.code() != tsl::error::ALREADY_EXISTS) {
226	return status;
227	}
228	}
229	return OkStatus();
230	}
231
232	Status FileSystem::CopyFile(const string& src, const string& target,
233	TransactionToken* token) {
234	return FileSystemCopyFile(this, src, this, target);
235	}
236
237	char FileSystem::Separator() const { return `'/'`; }
238
239	string FileSystem::JoinPathImpl(std::initializer_list<StringPiece> paths) {
240	string result;
241
242	for (StringPiece path : paths) {
243	if (path.empty()) continue;
244
245	if (result.empty()) {
246	result = string (path);
247	continue;
248	}
249
250	if (result [result.size() - `1`] == `'/'`) {
251	if (this->IsAbsolutePath(path)) {
252	strings::StrAppend(&result, path.substr(`1`));
253	} else {
254	strings::StrAppend(&result, path);
255	}
256	} else {
257	if (this->IsAbsolutePath(path)) {
258	strings::StrAppend(&result, path);
259	} else {
260	strings::StrAppend(&result, "/", path);
261	}
262	}
263	}
264
265	return result;
266	}
267
268	std::pair<StringPiece, StringPiece> FileSystem::SplitPath(
269	StringPiece uri) const {
270	StringPiece scheme, host, path;
271	ParseURI(uri, &scheme, &host, &path);
272
273	// We have 3 cases of results from `ParseURI`:
274	//
275	// 1. `path` is empty (`uri` is something like http://google.com/)
276	// Here, we don't have anything to split, so return empty components
277	//
278	// 2. all 3 components are non-empty (`uri` is something like
279	// http://google.com/path/to/resource)
280	// Here, all 3 components point to elements inside the same buffer as
281	// `uri`. In the given example, `scheme` contains `http://`, `host`
282	// contains `google.com/` and `path` contains `path/to/resource`.
283	// Since all 3 components point to the same buffer, we can do arithmetic
284	// such as `host.end() - uri.begin()` because we know for sure that
285	// `host` starts after `uri`.
286	//
287	// 3. `scheme` and `host` are empty (`uri` is local file, like /etc/passwd)
288	// Here, we split `path`, but we need to be careful with pointer
289	// arithmetic. Here we only know that `path` and `uri` represent the
290	// exact same buffer.
291	//
292	// To summarize, if `path` is empty there is nothing to return, in all other
293	// cases we can do arithmetic involving `path` and `uri` but if
294	// `host`/`scheme` are involved we need to make sure these are not empty.
295
296	// Case 1 above
297	if (path.empty()) {
298	return std::make_pair(StringPiece (), StringPiece ());
299	}
300
301	size_t pos = path.rfind(this->Separator());
302
303	// Our code assumes it is written for linux too many times. So, for windows
304	// also check for '/'
305	#ifdef PLATFORM_WINDOWS
306	size_t pos2 = path.rfind(`'/'`);
307	// Pick the max value that is not string::npos.
308	if (pos == string::npos) {
309	pos = pos2;
310	} else {
311	if (pos2 != string::npos) {
312	pos = pos > pos2 ? pos : pos2;
313	}
314	}
315	#endif
316
317	// Handle the case with no SEP in 'path'.
318	if (pos == StringPiece::npos) {
319	if (host.empty()) {
320	// Case 3 above, `uri` and `path` point to the same thing
321	// We are returning all of the `path` as basename here.
322	return std::make_pair(StringPiece (), path);
323	}
324
325	// Safe to do this arithmetic here, we are in case 2 above
326	return std::make_pair(StringPiece (uri.data(), host.end() - uri.begin()),
327	path);
328	}
329
330	// Handle the case with a single leading '/' in 'path'.
331	if (pos == `0`) {
332	return std::make_pair(
333	StringPiece (uri.data(), path.begin() + `1` - uri.begin()),
334	StringPiece (path.data() + `1`, path.size() - `1`));
335	}
336
337	return std::make_pair(
338	StringPiece (uri.data(), path.begin() + pos - uri.begin()),
339	StringPiece (path.data() + pos + `1`, path.size() - (pos + `1`)));
340	}
341
342	bool FileSystem::IsAbsolutePath(StringPiece path) const {
343	return !path.empty() && path [`0`] == `'/'`;
344	}
345
346	StringPiece FileSystem::Dirname(StringPiece path) const {
347	return this->SplitPath(path).first;
348	}
349
350	StringPiece FileSystem::Basename(StringPiece path) const {
351	return this->SplitPath(path).second;
352	}
353
354	StringPiece FileSystem::Extension(StringPiece path) const {
355	StringPiece basename = this->Basename(path);
356
357	size_t pos = basename.rfind(`'.'`);
358	if (pos == StringPiece::npos) {
359	return StringPiece (path.data() + path.size(), `0`);
360	} else {
361	return StringPiece (path.data() + pos + `1`, path.size() - (pos + `1`));
362	}
363	}
364
365	string FileSystem::CleanPath(StringPiece unclean_path) const {
366	string path(unclean_path);
367	const char* src = path.c_str();
368	string::iterator dst = path.begin();
369
370	// Check for absolute path and determine initial backtrack limit.
371	const bool is_absolute_path = *src == `'/'`;
372	if (is_absolute_path) {
373	dst ++ = src++;
374	while (*src == `'/'`) ++src;
375	}
376	string::const_iterator backtrack_limit = dst;
377
378	// Process all parts
379	while (*src) {
380	bool parsed = false;
381
382	if (src[`0`] == `'.'`) {
383	// 1dot ".<whateverisnext>", check for END or SEP.
384	if (src[`1`] == `'/'` \|\| !src[`1`]) {
385	if (*++src) {
386	++src;
387	}
388	parsed = true;
389	} else if (src[`1`] == `'.'` && (src[`2`] == `'/'` \|\| !src[`2`])) {
390	// 2dot END or SEP (".." \| "../<whateverisnext>").
391	src += `2`;
392	if (dst != backtrack_limit) {
393	// We can backtrack the previous part
394	for (--dst; dst != backtrack_limit && dst [-`1`] != `'/'`; --dst) {
395	// Empty.
396	}
397	} else if (!is_absolute_path) {
398	// Failed to backtrack and we can't skip it either. Rewind and copy.
399	src -= `2`;
400	dst ++ = src++;
401	dst ++ = src++;
402	if (*src) {
403	dst ++ = src;
404	}
405	// We can never backtrack over a copied "../" part so set new limit.
406	backtrack_limit = dst;
407	}
408	if (*src) {
409	++src;
410	}
411	parsed = true;
412	}
413	}
414
415	// If not parsed, copy entire part until the next SEP or EOS.
416	if (!parsed) {
417	while (src && src != `'/'`) {
418	dst ++ = src++;
419	}
420	if (*src) {
421	dst ++ = src++;
422	}
423	}
424
425	// Skip consecutive SEP occurrences
426	while (*src == `'/'`) {
427	++src;
428	}
429	}
430
431	// Calculate and check the length of the cleaned path.
432	string::difference_type path_length = dst - path.begin();
433	if (path_length != `0`) {
434	// Remove trailing '/' except if it is root path ("/" ==> path_length := 1)
435	if (path_length > `1` && path [path_length - `1`] == `'/'`) {
436	--path_length;
437	}
438	path.resize(path_length);
439	} else {
440	// The cleaned path is empty; assign "." as per the spec.
441	path.assign(`1`, `'.'`);
442	}
443	return path;
444	}
445
446	void FileSystem::ParseURI(StringPiece remaining, StringPiece* scheme,
447	StringPiece* host, StringPiece* path) const {
448	// 0. Parse scheme
449	// Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
450	// TODO(keveman): Allow "+" and "-" in the scheme.
451	// Keep URI pattern in tensorboard/backend/server.py updated accordingly
452	if (!strings::Scanner (remaining)
453	.One(strings::Scanner::LETTER)
454	.Many(strings::Scanner::LETTER_DIGIT_DOT)
455	.StopCapture()
456	.OneLiteral("://")
457	.GetResult(&remaining, scheme)) {
458	// If there's no scheme, assume the entire string is a path.
459	*scheme = StringPiece ();
460	*host = StringPiece ();
461	*path = remaining;
462	return;
463	}
464
465	// 1. Parse host
466	if (!strings::Scanner (remaining).ScanUntil(`'/'`).GetResult(&remaining, host)) {
467	// No path, so the rest of the URI is the host.
468	*host = remaining;
469	*path = StringPiece ();
470	return;
471	}
472
473	// 2. The rest is the path
474	*path = remaining;
475	}
476
477	string FileSystem::CreateURI(StringPiece scheme, StringPiece host,
478	StringPiece path) const {
479	if (scheme.empty()) {
480	return string (path);
481	}
482	return strings::StrCat(scheme, "://", host, path);
483	}
484
485	std::string FileSystem::DecodeTransaction(const TransactionToken* token) {
486	// TODO(sami): Switch using StrCat when void is supported*
487	if (token) {
488	std::stringstream oss;
489	oss << "Token= " << token->token << ", Owner=" << token->owner;
490	return oss.str();
491	}
492	return "No Transaction";
493	}
494
495	} // namespace tsl
496

Browse the source code of tensorflow/tensorflow/tsl/platform/file_system.cc