1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/tsl/platform/path.h"
17
18#include <errno.h>
19#include <fcntl.h>
20#include <stdlib.h>
21#include <sys/stat.h>
22#include <sys/types.h>
23#if defined(PLATFORM_WINDOWS)
24#include <windows.h>
25#else
26#include <unistd.h>
27#endif
28
29#include <vector>
30
31#include "absl/algorithm/container.h"
32#include "tensorflow/tsl/platform/logging.h"
33#include "tensorflow/tsl/platform/mutex.h"
34#include "tensorflow/tsl/platform/scanner.h"
35#include "tensorflow/tsl/platform/strcat.h"
36
37namespace tsl {
38namespace io {
39namespace internal {
40namespace {
41
42const char kPathSep[] = "/";
43
44bool FixBazelEnvPath(const char* path, string* out) {
45 if (path == nullptr) return false;
46 if (out == nullptr) return true;
47
48 *out = path;
49
50#ifdef PLATFORM_WINDOWS
51 // On Windows, paths generated by Bazel are always use `/` as the path
52 // separator. This prevents normal path management. In the event there are no
53 // `\` in the path, we convert all `/` to `\`.
54 if (out->find('\\') != string::npos) return path;
55
56 for (size_t pos = out->find('/'); pos != string::npos;
57 pos = out->find('/', pos + 1)) {
58 (*out)[pos] = kPathSep[0];
59 }
60#endif
61
62 return true;
63}
64
65} // namespace
66
67string JoinPathImpl(std::initializer_list<StringPiece> paths) {
68 string result;
69
70 for (StringPiece path : paths) {
71 if (path.empty()) continue;
72
73 if (result.empty()) {
74 result = string(path);
75 continue;
76 }
77
78 if (IsAbsolutePath(path)) path = path.substr(1);
79
80 if (result[result.size() - 1] == kPathSep[0]) {
81 strings::StrAppend(&result, path);
82 } else {
83 strings::StrAppend(&result, kPathSep, path);
84 }
85 }
86
87 return result;
88}
89
90// Return the parts of the URI, split on the final "/" in the path. If there is
91// no "/" in the path, the first part of the output is the scheme and host, and
92// the second is the path. If the only "/" in the path is the first character,
93// it is included in the first part of the output.
94std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
95 StringPiece scheme, host, path;
96 ParseURI(uri, &scheme, &host, &path);
97
98 auto pos = path.rfind('/');
99#ifdef PLATFORM_WINDOWS
100 if (pos == StringPiece::npos) pos = path.rfind('\\');
101#endif
102 // Handle the case with no '/' in 'path'.
103 if (pos == StringPiece::npos)
104 return std::make_pair(StringPiece(uri.data(), host.end() - uri.begin()),
105 path);
106
107 // Handle the case with a single leading '/' in 'path'.
108 if (pos == 0)
109 return std::make_pair(
110 StringPiece(uri.data(), path.begin() + 1 - uri.begin()),
111 StringPiece(path.data() + 1, path.size() - 1));
112
113 return std::make_pair(
114 StringPiece(uri.data(), path.begin() + pos - uri.begin()),
115 StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
116}
117
118// Return the parts of the basename of path, split on the final ".".
119// If there is no "." in the basename or "." is the final character in the
120// basename, the second value will be empty.
121std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) {
122 path = Basename(path);
123
124 auto pos = path.rfind('.');
125 if (pos == StringPiece::npos)
126 return std::make_pair(path, StringPiece(path.data() + path.size(), 0));
127 return std::make_pair(
128 StringPiece(path.data(), pos),
129 StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
130}
131
132} // namespace internal
133
134bool IsAbsolutePath(StringPiece path) {
135 return !path.empty() && path[0] == '/';
136}
137
138StringPiece Dirname(StringPiece path) {
139 return internal::SplitPath(path).first;
140}
141
142StringPiece Basename(StringPiece path) {
143 return internal::SplitPath(path).second;
144}
145
146StringPiece Extension(StringPiece path) {
147 return internal::SplitBasename(path).second;
148}
149
150string CleanPath(StringPiece unclean_path) {
151 string path(unclean_path);
152 const char* src = path.c_str();
153 string::iterator dst = path.begin();
154
155 // Check for absolute path and determine initial backtrack limit.
156 const bool is_absolute_path = *src == '/';
157 if (is_absolute_path) {
158 *dst++ = *src++;
159 while (*src == '/') ++src;
160 }
161 string::const_iterator backtrack_limit = dst;
162
163 // Process all parts
164 while (*src) {
165 bool parsed = false;
166
167 if (src[0] == '.') {
168 // 1dot ".<whateverisnext>", check for END or SEP.
169 if (src[1] == '/' || !src[1]) {
170 if (*++src) {
171 ++src;
172 }
173 parsed = true;
174 } else if (src[1] == '.' && (src[2] == '/' || !src[2])) {
175 // 2dot END or SEP (".." | "../<whateverisnext>").
176 src += 2;
177 if (dst != backtrack_limit) {
178 // We can backtrack the previous part
179 for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) {
180 // Empty.
181 }
182 } else if (!is_absolute_path) {
183 // Failed to backtrack and we can't skip it either. Rewind and copy.
184 src -= 2;
185 *dst++ = *src++;
186 *dst++ = *src++;
187 if (*src) {
188 *dst++ = *src;
189 }
190 // We can never backtrack over a copied "../" part so set new limit.
191 backtrack_limit = dst;
192 }
193 if (*src) {
194 ++src;
195 }
196 parsed = true;
197 }
198 }
199
200 // If not parsed, copy entire part until the next SEP or EOS.
201 if (!parsed) {
202 while (*src && *src != '/') {
203 *dst++ = *src++;
204 }
205 if (*src) {
206 *dst++ = *src++;
207 }
208 }
209
210 // Skip consecutive SEP occurrences
211 while (*src == '/') {
212 ++src;
213 }
214 }
215
216 // Calculate and check the length of the cleaned path.
217 string::difference_type path_length = dst - path.begin();
218 if (path_length != 0) {
219 // Remove trailing '/' except if it is root path ("/" ==> path_length := 1)
220 if (path_length > 1 && path[path_length - 1] == '/') {
221 --path_length;
222 }
223 path.resize(path_length);
224 } else {
225 // The cleaned path is empty; assign "." as per the spec.
226 path.assign(1, '.');
227 }
228 return path;
229}
230
231void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host,
232 StringPiece* path) {
233 // 0. Parse scheme
234 // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
235 // TODO(keveman): Allow "+" and "-" in the scheme.
236 // Keep URI pattern in TensorBoard's `_parse_event_files_spec` updated
237 // accordingly
238 if (!strings::Scanner(uri)
239 .One(strings::Scanner::LETTER)
240 .Many(strings::Scanner::LETTER_DIGIT_DOT)
241 .StopCapture()
242 .OneLiteral("://")
243 .GetResult(&uri, scheme)) {
244 // If there's no scheme, assume the entire string is a path.
245 *scheme = StringPiece(uri.data(), 0);
246 *host = StringPiece(uri.data(), 0);
247 *path = uri;
248 return;
249 }
250
251 // 1. Parse host
252 if (!strings::Scanner(uri).ScanUntil('/').GetResult(&uri, host)) {
253 // No path, so the rest of the URI is the host.
254 *host = uri;
255 *path = StringPiece(); // empty path
256 return;
257 }
258
259 // 2. The rest is the path
260 *path = uri;
261}
262
263string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
264 if (scheme.empty()) {
265 return string(path);
266 }
267 return strings::StrCat(scheme, "://", host, path);
268}
269
270// Returns a unique number every time it is called.
271int64_t UniqueId() {
272 static mutex mu(LINKER_INITIALIZED);
273 static int64_t id = 0;
274 mutex_lock l(mu);
275 return ++id;
276}
277
278string CommonPathPrefix(absl::Span<const string> paths) {
279 if (paths.empty()) return "";
280 size_t min_filename_size =
281 absl::c_min_element(paths, [](const string& a, const string& b) {
282 return a.size() < b.size();
283 })->size();
284 if (min_filename_size == 0) return "";
285
286 size_t common_prefix_size = [&] {
287 for (size_t prefix_size = 0; prefix_size < min_filename_size;
288 prefix_size++) {
289 char c = paths[0][prefix_size];
290 for (int f = 1; f < paths.size(); f++) {
291 if (paths[f][prefix_size] != c) {
292 return prefix_size;
293 }
294 }
295 }
296 return min_filename_size;
297 }();
298
299 size_t rpos = absl::string_view(paths[0])
300 .substr(0, common_prefix_size)
301 .rfind(internal::kPathSep);
302 return rpos == std::string::npos
303 ? ""
304 : std::string(absl::string_view(paths[0]).substr(0, rpos + 1));
305}
306
307string GetTempFilename(const string& extension) {
308#if defined(__ANDROID__)
309 LOG(FATAL) << "GetTempFilename is not implemented in this platform.";
310#elif defined(PLATFORM_WINDOWS)
311 char temp_dir[_MAX_PATH];
312 DWORD retval;
313 retval = GetTempPath(_MAX_PATH, temp_dir);
314 if (retval > _MAX_PATH || retval == 0) {
315 LOG(FATAL) << "Cannot get the directory for temporary files.";
316 }
317
318 char temp_file_name[_MAX_PATH];
319 retval = GetTempFileName(temp_dir, "", UniqueId(), temp_file_name);
320 if (retval > _MAX_PATH || retval == 0) {
321 LOG(FATAL) << "Cannot get a temporary file in: " << temp_dir;
322 }
323
324 string full_tmp_file_name(temp_file_name);
325 full_tmp_file_name.append(extension);
326 return full_tmp_file_name;
327#else
328 for (const char* dir : std::vector<const char*>(
329 {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) {
330 if (!dir || !dir[0]) {
331 continue;
332 }
333 struct stat statbuf;
334 if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) {
335 // UniqueId is added here because mkstemps is not as thread safe as it
336 // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows
337 // the problem.
338 string tmp_filepath;
339 int fd;
340 if (extension.length()) {
341 tmp_filepath = io::JoinPath(
342 dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.",
343 extension));
344 fd = mkstemps(&tmp_filepath[0], extension.length() + 1);
345 } else {
346 tmp_filepath = io::JoinPath(
347 dir,
348 strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX"));
349 fd = mkstemp(&tmp_filepath[0]);
350 }
351 if (fd < 0) {
352 LOG(FATAL) << "Failed to create temp file.";
353 } else {
354 if (close(fd) < 0) {
355 LOG(ERROR) << "close() failed: " << strerror(errno);
356 }
357 return tmp_filepath;
358 }
359 }
360 }
361 LOG(FATAL) << "No temp directory found.";
362 std::abort();
363#endif
364}
365
366bool GetTestUndeclaredOutputsDir(string* dir) {
367 return internal::FixBazelEnvPath(getenv("TEST_UNDECLARED_OUTPUTS_DIR"), dir);
368}
369
370} // namespace io
371} // namespace tsl
372