1 | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_TSL_PLATFORM_RAM_FILE_SYSTEM_H_ |
17 | #define TENSORFLOW_TSL_PLATFORM_RAM_FILE_SYSTEM_H_ |
18 | |
19 | // Implementation of an in-memory TF filesystem for simple prototyping (e.g. |
20 | // via Colab). The TPU TF server does not have local filesystem access, which |
21 | // makes it difficult to provide Colab tutorials: users must have GCS access |
22 | // and sign-in in order to try out an example. |
23 | // |
24 | // Files are implemented on top of std::string. Directories, as with GCS or S3, |
25 | // are implicit based on the existence of child files. Multiple files may |
26 | // reference a single FS location, though no thread-safety guarantees are |
27 | // provided. |
28 | |
29 | #include <string> |
30 | |
31 | #include "tensorflow/tsl/platform/env.h" |
32 | #include "tensorflow/tsl/platform/file_system.h" |
33 | #include "tensorflow/tsl/platform/mutex.h" |
34 | #include "tensorflow/tsl/platform/stringpiece.h" |
35 | #include "tensorflow/tsl/platform/types.h" |
36 | |
37 | #ifdef PLATFORM_WINDOWS |
38 | #undef DeleteFile |
39 | #undef CopyFile |
40 | #undef TranslateName |
41 | #endif |
42 | |
43 | namespace tsl { |
44 | |
45 | class RamRandomAccessFile : public RandomAccessFile, public WritableFile { |
46 | public: |
47 | RamRandomAccessFile(std::string name, std::shared_ptr<std::string> cord) |
48 | : name_(name), data_(cord) {} |
49 | ~RamRandomAccessFile() override {} |
50 | |
51 | Status Name(StringPiece* result) const override { |
52 | *result = name_; |
53 | return OkStatus(); |
54 | } |
55 | |
56 | Status Read(uint64 offset, size_t n, StringPiece* result, |
57 | char* scratch) const override { |
58 | if (offset >= data_->size()) { |
59 | return errors::OutOfRange("" ); |
60 | } |
61 | |
62 | uint64 left = std::min(static_cast<uint64>(n), data_->size() - offset); |
63 | auto start = data_->begin() + offset; |
64 | auto end = data_->begin() + offset + left; |
65 | |
66 | std::copy(start, end, scratch); |
67 | *result = StringPiece(scratch, left); |
68 | |
69 | // In case of a partial read, we must still fill `result`, but also return |
70 | // OutOfRange. |
71 | if (left < n) { |
72 | return errors::OutOfRange("" ); |
73 | } |
74 | return OkStatus(); |
75 | } |
76 | |
77 | Status Append(StringPiece data) override { |
78 | data_->append(data.data(), data.size()); |
79 | return OkStatus(); |
80 | } |
81 | |
82 | #if defined(TF_CORD_SUPPORT) |
83 | Status Append(const absl::Cord& cord) override { |
84 | data_->append(cord.char_begin(), cord.char_end()); |
85 | return OkStatus(); |
86 | } |
87 | #endif |
88 | |
89 | Status Close() override { return OkStatus(); } |
90 | Status Flush() override { return OkStatus(); } |
91 | Status Sync() override { return OkStatus(); } |
92 | |
93 | Status Tell(int64_t* position) override { |
94 | *position = -1; |
95 | return errors::Unimplemented("This filesystem does not support Tell()" ); |
96 | } |
97 | |
98 | private: |
99 | TF_DISALLOW_COPY_AND_ASSIGN(RamRandomAccessFile); |
100 | std::string name_; |
101 | std::shared_ptr<std::string> data_; |
102 | }; |
103 | |
104 | class RamFileSystem : public FileSystem { |
105 | public: |
106 | TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; |
107 | |
108 | Status NewRandomAccessFile( |
109 | const std::string& fname_, TransactionToken* token, |
110 | std::unique_ptr<RandomAccessFile>* result) override { |
111 | mutex_lock m(mu_); |
112 | auto fname = StripRamFsPrefix(fname_); |
113 | |
114 | if (fs_.find(fname) == fs_.end()) { |
115 | return errors::NotFound("" ); |
116 | } |
117 | if (fs_[fname] == nullptr) { |
118 | return errors::InvalidArgument(fname_, " is a directory." ); |
119 | } |
120 | *result = std::unique_ptr<RandomAccessFile>( |
121 | new RamRandomAccessFile(fname, fs_[fname])); |
122 | return OkStatus(); |
123 | } |
124 | |
125 | Status NewWritableFile(const std::string& fname_, TransactionToken* token, |
126 | std::unique_ptr<WritableFile>* result) override { |
127 | mutex_lock m(mu_); |
128 | auto fname = StripRamFsPrefix(fname_); |
129 | |
130 | if (fs_.find(fname) == fs_.end()) { |
131 | fs_[fname] = std::make_shared<std::string>(); |
132 | } |
133 | if (fs_[fname] == nullptr) { |
134 | return errors::InvalidArgument(fname_, " is a directory." ); |
135 | } |
136 | *result = std::unique_ptr<WritableFile>( |
137 | new RamRandomAccessFile(fname, fs_[fname])); |
138 | return OkStatus(); |
139 | } |
140 | |
141 | Status NewAppendableFile(const std::string& fname_, TransactionToken* token, |
142 | std::unique_ptr<WritableFile>* result) override { |
143 | mutex_lock m(mu_); |
144 | auto fname = StripRamFsPrefix(fname_); |
145 | |
146 | if (fs_.find(fname) == fs_.end()) { |
147 | fs_[fname] = std::make_shared<std::string>(); |
148 | } |
149 | if (fs_[fname] == nullptr) { |
150 | return errors::InvalidArgument(fname_, " is a directory." ); |
151 | } |
152 | *result = std::unique_ptr<WritableFile>( |
153 | new RamRandomAccessFile(fname, fs_[fname])); |
154 | return OkStatus(); |
155 | } |
156 | |
157 | Status NewReadOnlyMemoryRegionFromFile( |
158 | const std::string& fname, TransactionToken* token, |
159 | std::unique_ptr<ReadOnlyMemoryRegion>* result) override { |
160 | return errors::Unimplemented("" ); |
161 | } |
162 | |
163 | Status FileExists(const std::string& fname_, |
164 | TransactionToken* token) override { |
165 | FileStatistics stat; |
166 | auto fname = StripRamFsPrefix(fname_); |
167 | |
168 | return Stat(fname, token, &stat); |
169 | } |
170 | |
171 | Status GetChildren(const std::string& dir_, TransactionToken* token, |
172 | std::vector<std::string>* result) override { |
173 | mutex_lock m(mu_); |
174 | auto dir = StripRamFsPrefix(dir_); |
175 | |
176 | auto it = fs_.lower_bound(dir); |
177 | while (it != fs_.end() && StartsWith(it->first, dir)) { |
178 | auto filename = StripPrefix(StripPrefix(it->first, dir), "/" ); |
179 | // It is not either (a) the parent directory itself or (b) a subdirectory |
180 | if (!filename.empty() && filename.find("/" ) == std::string::npos) { |
181 | result->push_back(filename); |
182 | } |
183 | ++it; |
184 | } |
185 | |
186 | return OkStatus(); |
187 | } |
188 | |
189 | Status GetMatchingPaths(const std::string& pattern_, TransactionToken* token, |
190 | std::vector<std::string>* results) override { |
191 | mutex_lock m(mu_); |
192 | auto pattern = StripRamFsPrefix(pattern_); |
193 | |
194 | Env* env = Env::Default(); |
195 | for (auto it = fs_.begin(); it != fs_.end(); ++it) { |
196 | if (env->MatchPath(it->first, pattern)) { |
197 | results->push_back("ram://" + it->first); |
198 | } |
199 | } |
200 | return OkStatus(); |
201 | } |
202 | |
203 | Status Stat(const std::string& fname_, TransactionToken* token, |
204 | FileStatistics* stat) override { |
205 | mutex_lock m(mu_); |
206 | auto fname = StripRamFsPrefix(fname_); |
207 | |
208 | auto it = fs_.lower_bound(fname); |
209 | if (it == fs_.end() || !StartsWith(it->first, fname)) { |
210 | return errors::NotFound("" ); |
211 | } |
212 | |
213 | if (it->first == fname && it->second != nullptr) { |
214 | stat->is_directory = false; |
215 | stat->length = fs_[fname]->size(); |
216 | stat->mtime_nsec = 0; |
217 | return OkStatus(); |
218 | } |
219 | |
220 | stat->is_directory = true; |
221 | stat->length = 0; |
222 | stat->mtime_nsec = 0; |
223 | return OkStatus(); |
224 | } |
225 | |
226 | Status DeleteFile(const std::string& fname_, |
227 | TransactionToken* token) override { |
228 | mutex_lock m(mu_); |
229 | auto fname = StripRamFsPrefix(fname_); |
230 | |
231 | if (fs_.find(fname) != fs_.end()) { |
232 | fs_.erase(fname); |
233 | return OkStatus(); |
234 | } |
235 | |
236 | return errors::NotFound("" ); |
237 | } |
238 | |
239 | Status CreateDir(const std::string& dirname_, |
240 | TransactionToken* token) override { |
241 | mutex_lock m(mu_); |
242 | auto dirname = StripRamFsPrefix(dirname_); |
243 | |
244 | auto it = fs_.find(dirname); |
245 | if (it != fs_.end() && it->second != nullptr) { |
246 | return errors::AlreadyExists( |
247 | "cannot create directory with same name as an existing file" ); |
248 | } |
249 | |
250 | fs_[dirname] = nullptr; |
251 | return OkStatus(); |
252 | } |
253 | |
254 | Status RecursivelyCreateDir(const std::string& dirname_, |
255 | TransactionToken* token) override { |
256 | auto dirname = StripRamFsPrefix(dirname_); |
257 | |
258 | std::vector<std::string> dirs = StrSplit(dirname, "/" ); |
259 | Status last_status; |
260 | std::string dir = dirs[0]; |
261 | last_status = CreateDir(dir, token); |
262 | |
263 | for (int i = 1; i < dirs.size(); ++i) { |
264 | dir = dir + "/" + dirs[i]; |
265 | last_status = CreateDir(dir, token); |
266 | } |
267 | return last_status; |
268 | } |
269 | |
270 | Status DeleteDir(const std::string& dirname_, |
271 | TransactionToken* token) override { |
272 | mutex_lock m(mu_); |
273 | auto dirname = StripRamFsPrefix(dirname_); |
274 | |
275 | auto it = fs_.find(dirname); |
276 | if (it == fs_.end()) { |
277 | return errors::NotFound("" ); |
278 | } |
279 | if (it->second != nullptr) { |
280 | return errors::InvalidArgument("Not a directory" ); |
281 | } |
282 | fs_.erase(dirname); |
283 | |
284 | return OkStatus(); |
285 | } |
286 | |
287 | Status GetFileSize(const std::string& fname_, TransactionToken* token, |
288 | uint64* file_size) override { |
289 | mutex_lock m(mu_); |
290 | auto fname = StripRamFsPrefix(fname_); |
291 | |
292 | if (fs_.find(fname) != fs_.end()) { |
293 | if (fs_[fname] == nullptr) { |
294 | return errors::InvalidArgument("Not a file" ); |
295 | } |
296 | *file_size = fs_[fname]->size(); |
297 | return OkStatus(); |
298 | } |
299 | return errors::NotFound("" ); |
300 | } |
301 | |
302 | Status RenameFile(const std::string& src_, const std::string& target_, |
303 | TransactionToken* token) override { |
304 | mutex_lock m(mu_); |
305 | auto src = StripRamFsPrefix(src_); |
306 | auto target = StripRamFsPrefix(target_); |
307 | |
308 | if (fs_.find(src) != fs_.end()) { |
309 | fs_[target] = fs_[src]; |
310 | fs_.erase(fs_.find(src)); |
311 | return OkStatus(); |
312 | } |
313 | return errors::NotFound("" ); |
314 | } |
315 | |
316 | RamFileSystem() {} |
317 | ~RamFileSystem() override {} |
318 | |
319 | private: |
320 | mutex mu_; |
321 | std::map<std::string, std::shared_ptr<std::string>> fs_; |
322 | |
323 | std::vector<std::string> StrSplit(std::string s, std::string delim) { |
324 | std::vector<std::string> ret; |
325 | size_t curr_pos = 0; |
326 | while ((curr_pos = s.find(delim)) != std::string::npos) { |
327 | ret.push_back(s.substr(0, curr_pos)); |
328 | s.erase(0, curr_pos + delim.size()); |
329 | } |
330 | ret.push_back(s); |
331 | return ret; |
332 | } |
333 | |
334 | bool StartsWith(std::string s, std::string prefix) { |
335 | return s.find(prefix) == 0; |
336 | } |
337 | |
338 | string StripPrefix(std::string s, std::string prefix) { |
339 | if (s.find(prefix) == 0) { |
340 | return s.erase(0, prefix.size()); |
341 | } |
342 | return s; |
343 | } |
344 | |
345 | string StripRamFsPrefix(std::string name) { |
346 | std::string s = StripPrefix(name, "ram://" ); |
347 | if (*(s.rbegin()) == '/') { |
348 | s.pop_back(); |
349 | } |
350 | return s; |
351 | } |
352 | }; |
353 | |
354 | } // namespace tsl |
355 | |
356 | #endif // TENSORFLOW_TSL_PLATFORM_RAM_FILE_SYSTEM_H_ |
357 | |