1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/tsl/platform/path.h" |
17 | |
18 | #include <errno.h> |
19 | #include <fcntl.h> |
20 | #include <stdlib.h> |
21 | #include <sys/stat.h> |
22 | #include <sys/types.h> |
23 | #if defined(PLATFORM_WINDOWS) |
24 | #include <windows.h> |
25 | #else |
26 | #include <unistd.h> |
27 | #endif |
28 | |
29 | #include <vector> |
30 | |
31 | #include "absl/algorithm/container.h" |
32 | #include "tensorflow/tsl/platform/logging.h" |
33 | #include "tensorflow/tsl/platform/mutex.h" |
34 | #include "tensorflow/tsl/platform/scanner.h" |
35 | #include "tensorflow/tsl/platform/strcat.h" |
36 | |
37 | namespace tsl { |
38 | namespace io { |
39 | namespace internal { |
40 | namespace { |
41 | |
42 | const char kPathSep[] = "/" ; |
43 | |
44 | bool FixBazelEnvPath(const char* path, string* out) { |
45 | if (path == nullptr) return false; |
46 | if (out == nullptr) return true; |
47 | |
48 | *out = path; |
49 | |
50 | #ifdef PLATFORM_WINDOWS |
51 | // On Windows, paths generated by Bazel are always use `/` as the path |
52 | // separator. This prevents normal path management. In the event there are no |
53 | // `\` in the path, we convert all `/` to `\`. |
54 | if (out->find('\\') != string::npos) return path; |
55 | |
56 | for (size_t pos = out->find('/'); pos != string::npos; |
57 | pos = out->find('/', pos + 1)) { |
58 | (*out)[pos] = kPathSep[0]; |
59 | } |
60 | #endif |
61 | |
62 | return true; |
63 | } |
64 | |
65 | } // namespace |
66 | |
67 | string JoinPathImpl(std::initializer_list<StringPiece> paths) { |
68 | string result; |
69 | |
70 | for (StringPiece path : paths) { |
71 | if (path.empty()) continue; |
72 | |
73 | if (result.empty()) { |
74 | result = string(path); |
75 | continue; |
76 | } |
77 | |
78 | if (IsAbsolutePath(path)) path = path.substr(1); |
79 | |
80 | if (result[result.size() - 1] == kPathSep[0]) { |
81 | strings::StrAppend(&result, path); |
82 | } else { |
83 | strings::StrAppend(&result, kPathSep, path); |
84 | } |
85 | } |
86 | |
87 | return result; |
88 | } |
89 | |
90 | // Return the parts of the URI, split on the final "/" in the path. If there is |
91 | // no "/" in the path, the first part of the output is the scheme and host, and |
92 | // the second is the path. If the only "/" in the path is the first character, |
93 | // it is included in the first part of the output. |
94 | std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) { |
95 | StringPiece scheme, host, path; |
96 | ParseURI(uri, &scheme, &host, &path); |
97 | |
98 | auto pos = path.rfind('/'); |
99 | #ifdef PLATFORM_WINDOWS |
100 | if (pos == StringPiece::npos) pos = path.rfind('\\'); |
101 | #endif |
102 | // Handle the case with no '/' in 'path'. |
103 | if (pos == StringPiece::npos) |
104 | return std::make_pair(StringPiece(uri.data(), host.end() - uri.begin()), |
105 | path); |
106 | |
107 | // Handle the case with a single leading '/' in 'path'. |
108 | if (pos == 0) |
109 | return std::make_pair( |
110 | StringPiece(uri.data(), path.begin() + 1 - uri.begin()), |
111 | StringPiece(path.data() + 1, path.size() - 1)); |
112 | |
113 | return std::make_pair( |
114 | StringPiece(uri.data(), path.begin() + pos - uri.begin()), |
115 | StringPiece(path.data() + pos + 1, path.size() - (pos + 1))); |
116 | } |
117 | |
118 | // Return the parts of the basename of path, split on the final ".". |
119 | // If there is no "." in the basename or "." is the final character in the |
120 | // basename, the second value will be empty. |
121 | std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) { |
122 | path = Basename(path); |
123 | |
124 | auto pos = path.rfind('.'); |
125 | if (pos == StringPiece::npos) |
126 | return std::make_pair(path, StringPiece(path.data() + path.size(), 0)); |
127 | return std::make_pair( |
128 | StringPiece(path.data(), pos), |
129 | StringPiece(path.data() + pos + 1, path.size() - (pos + 1))); |
130 | } |
131 | |
132 | } // namespace internal |
133 | |
134 | bool IsAbsolutePath(StringPiece path) { |
135 | return !path.empty() && path[0] == '/'; |
136 | } |
137 | |
138 | StringPiece Dirname(StringPiece path) { |
139 | return internal::SplitPath(path).first; |
140 | } |
141 | |
142 | StringPiece Basename(StringPiece path) { |
143 | return internal::SplitPath(path).second; |
144 | } |
145 | |
146 | StringPiece Extension(StringPiece path) { |
147 | return internal::SplitBasename(path).second; |
148 | } |
149 | |
150 | string CleanPath(StringPiece unclean_path) { |
151 | string path(unclean_path); |
152 | const char* src = path.c_str(); |
153 | string::iterator dst = path.begin(); |
154 | |
155 | // Check for absolute path and determine initial backtrack limit. |
156 | const bool is_absolute_path = *src == '/'; |
157 | if (is_absolute_path) { |
158 | *dst++ = *src++; |
159 | while (*src == '/') ++src; |
160 | } |
161 | string::const_iterator backtrack_limit = dst; |
162 | |
163 | // Process all parts |
164 | while (*src) { |
165 | bool parsed = false; |
166 | |
167 | if (src[0] == '.') { |
168 | // 1dot ".<whateverisnext>", check for END or SEP. |
169 | if (src[1] == '/' || !src[1]) { |
170 | if (*++src) { |
171 | ++src; |
172 | } |
173 | parsed = true; |
174 | } else if (src[1] == '.' && (src[2] == '/' || !src[2])) { |
175 | // 2dot END or SEP (".." | "../<whateverisnext>"). |
176 | src += 2; |
177 | if (dst != backtrack_limit) { |
178 | // We can backtrack the previous part |
179 | for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) { |
180 | // Empty. |
181 | } |
182 | } else if (!is_absolute_path) { |
183 | // Failed to backtrack and we can't skip it either. Rewind and copy. |
184 | src -= 2; |
185 | *dst++ = *src++; |
186 | *dst++ = *src++; |
187 | if (*src) { |
188 | *dst++ = *src; |
189 | } |
190 | // We can never backtrack over a copied "../" part so set new limit. |
191 | backtrack_limit = dst; |
192 | } |
193 | if (*src) { |
194 | ++src; |
195 | } |
196 | parsed = true; |
197 | } |
198 | } |
199 | |
200 | // If not parsed, copy entire part until the next SEP or EOS. |
201 | if (!parsed) { |
202 | while (*src && *src != '/') { |
203 | *dst++ = *src++; |
204 | } |
205 | if (*src) { |
206 | *dst++ = *src++; |
207 | } |
208 | } |
209 | |
210 | // Skip consecutive SEP occurrences |
211 | while (*src == '/') { |
212 | ++src; |
213 | } |
214 | } |
215 | |
216 | // Calculate and check the length of the cleaned path. |
217 | string::difference_type path_length = dst - path.begin(); |
218 | if (path_length != 0) { |
219 | // Remove trailing '/' except if it is root path ("/" ==> path_length := 1) |
220 | if (path_length > 1 && path[path_length - 1] == '/') { |
221 | --path_length; |
222 | } |
223 | path.resize(path_length); |
224 | } else { |
225 | // The cleaned path is empty; assign "." as per the spec. |
226 | path.assign(1, '.'); |
227 | } |
228 | return path; |
229 | } |
230 | |
231 | void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host, |
232 | StringPiece* path) { |
233 | // 0. Parse scheme |
234 | // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]* |
235 | // TODO(keveman): Allow "+" and "-" in the scheme. |
236 | // Keep URI pattern in TensorBoard's `_parse_event_files_spec` updated |
237 | // accordingly |
238 | if (!strings::Scanner(uri) |
239 | .One(strings::Scanner::LETTER) |
240 | .Many(strings::Scanner::LETTER_DIGIT_DOT) |
241 | .StopCapture() |
242 | .OneLiteral("://" ) |
243 | .GetResult(&uri, scheme)) { |
244 | // If there's no scheme, assume the entire string is a path. |
245 | *scheme = StringPiece(uri.data(), 0); |
246 | *host = StringPiece(uri.data(), 0); |
247 | *path = uri; |
248 | return; |
249 | } |
250 | |
251 | // 1. Parse host |
252 | if (!strings::Scanner(uri).ScanUntil('/').GetResult(&uri, host)) { |
253 | // No path, so the rest of the URI is the host. |
254 | *host = uri; |
255 | *path = StringPiece(); // empty path |
256 | return; |
257 | } |
258 | |
259 | // 2. The rest is the path |
260 | *path = uri; |
261 | } |
262 | |
263 | string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) { |
264 | if (scheme.empty()) { |
265 | return string(path); |
266 | } |
267 | return strings::StrCat(scheme, "://" , host, path); |
268 | } |
269 | |
270 | // Returns a unique number every time it is called. |
271 | int64_t UniqueId() { |
272 | static mutex mu(LINKER_INITIALIZED); |
273 | static int64_t id = 0; |
274 | mutex_lock l(mu); |
275 | return ++id; |
276 | } |
277 | |
278 | string CommonPathPrefix(absl::Span<const string> paths) { |
279 | if (paths.empty()) return "" ; |
280 | size_t min_filename_size = |
281 | absl::c_min_element(paths, [](const string& a, const string& b) { |
282 | return a.size() < b.size(); |
283 | })->size(); |
284 | if (min_filename_size == 0) return "" ; |
285 | |
286 | size_t common_prefix_size = [&] { |
287 | for (size_t prefix_size = 0; prefix_size < min_filename_size; |
288 | prefix_size++) { |
289 | char c = paths[0][prefix_size]; |
290 | for (int f = 1; f < paths.size(); f++) { |
291 | if (paths[f][prefix_size] != c) { |
292 | return prefix_size; |
293 | } |
294 | } |
295 | } |
296 | return min_filename_size; |
297 | }(); |
298 | |
299 | size_t rpos = absl::string_view(paths[0]) |
300 | .substr(0, common_prefix_size) |
301 | .rfind(internal::kPathSep); |
302 | return rpos == std::string::npos |
303 | ? "" |
304 | : std::string(absl::string_view(paths[0]).substr(0, rpos + 1)); |
305 | } |
306 | |
307 | string GetTempFilename(const string& extension) { |
308 | #if defined(__ANDROID__) |
309 | LOG(FATAL) << "GetTempFilename is not implemented in this platform." ; |
310 | #elif defined(PLATFORM_WINDOWS) |
311 | char temp_dir[_MAX_PATH]; |
312 | DWORD retval; |
313 | retval = GetTempPath(_MAX_PATH, temp_dir); |
314 | if (retval > _MAX_PATH || retval == 0) { |
315 | LOG(FATAL) << "Cannot get the directory for temporary files." ; |
316 | } |
317 | |
318 | char temp_file_name[_MAX_PATH]; |
319 | retval = GetTempFileName(temp_dir, "" , UniqueId(), temp_file_name); |
320 | if (retval > _MAX_PATH || retval == 0) { |
321 | LOG(FATAL) << "Cannot get a temporary file in: " << temp_dir; |
322 | } |
323 | |
324 | string full_tmp_file_name(temp_file_name); |
325 | full_tmp_file_name.append(extension); |
326 | return full_tmp_file_name; |
327 | #else |
328 | for (const char* dir : std::vector<const char*>( |
329 | {getenv("TEST_TMPDIR" ), getenv("TMPDIR" ), getenv("TMP" ), "/tmp" })) { |
330 | if (!dir || !dir[0]) { |
331 | continue; |
332 | } |
333 | struct stat statbuf; |
334 | if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) { |
335 | // UniqueId is added here because mkstemps is not as thread safe as it |
336 | // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows |
337 | // the problem. |
338 | string tmp_filepath; |
339 | int fd; |
340 | if (extension.length()) { |
341 | tmp_filepath = io::JoinPath( |
342 | dir, strings::StrCat("tmp_file_tensorflow_" , UniqueId(), "_XXXXXX." , |
343 | extension)); |
344 | fd = mkstemps(&tmp_filepath[0], extension.length() + 1); |
345 | } else { |
346 | tmp_filepath = io::JoinPath( |
347 | dir, |
348 | strings::StrCat("tmp_file_tensorflow_" , UniqueId(), "_XXXXXX" )); |
349 | fd = mkstemp(&tmp_filepath[0]); |
350 | } |
351 | if (fd < 0) { |
352 | LOG(FATAL) << "Failed to create temp file." ; |
353 | } else { |
354 | if (close(fd) < 0) { |
355 | LOG(ERROR) << "close() failed: " << strerror(errno); |
356 | } |
357 | return tmp_filepath; |
358 | } |
359 | } |
360 | } |
361 | LOG(FATAL) << "No temp directory found." ; |
362 | std::abort(); |
363 | #endif |
364 | } |
365 | |
366 | bool GetTestUndeclaredOutputsDir(string* dir) { |
367 | return internal::FixBazelEnvPath(getenv("TEST_UNDECLARED_OUTPUTS_DIR" ), dir); |
368 | } |
369 | |
370 | } // namespace io |
371 | } // namespace tsl |
372 | |