1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include "tensorflow/core/util/memmapped_file_system.h" |
16 | |
17 | #include "tensorflow/core/lib/core/errors.h" |
18 | #include "tensorflow/core/lib/strings/str_util.h" |
19 | #include "tensorflow/core/platform/protobuf.h" |
20 | #include "tensorflow/core/util/memmapped_file_system.pb.h" |
21 | |
22 | namespace tensorflow { |
23 | |
24 | namespace { |
25 | |
26 | uint64 DecodeUint64LittleEndian(const uint8* buffer) { |
27 | uint64 result = 0; |
28 | for (int i = 0; i < static_cast<int>(sizeof(uint64)); ++i) { |
29 | result |= static_cast<uint64>(buffer[i]) << (8 * i); |
30 | } |
31 | return result; |
32 | } |
33 | |
34 | } // namespace |
35 | |
36 | namespace { |
37 | |
38 | class ReadOnlyMemoryRegionFromMemmapped : public ReadOnlyMemoryRegion { |
39 | public: |
40 | ReadOnlyMemoryRegionFromMemmapped(const void* data, uint64 length) |
41 | : data_(data), length_(length) {} |
42 | ~ReadOnlyMemoryRegionFromMemmapped() override = default; |
43 | const void* data() override { return data_; } |
44 | uint64 length() override { return length_; } |
45 | |
46 | private: |
47 | const void* const data_; |
48 | const uint64 length_; |
49 | // intentionally copyable |
50 | }; |
51 | |
52 | class RandomAccessFileFromMemmapped : public RandomAccessFile { |
53 | public: |
54 | RandomAccessFileFromMemmapped(const void* data, uint64 length) |
55 | : data_(data), length_(length) {} |
56 | |
57 | ~RandomAccessFileFromMemmapped() override = default; |
58 | |
59 | Status Name(StringPiece* result) const override { |
60 | return errors::Unimplemented( |
61 | "RandomAccessFileFromMemmapped does not support Name()" ); |
62 | } |
63 | |
64 | Status Read(uint64 offset, size_t to_read, StringPiece* result, |
65 | char* scratch) const override { |
66 | if (offset >= length_) { |
67 | *result = StringPiece(scratch, 0); |
68 | return Status(error::OUT_OF_RANGE, "Read after file end" ); |
69 | } |
70 | const uint64 region_left = |
71 | std::min(length_ - offset, static_cast<uint64>(to_read)); |
72 | *result = |
73 | StringPiece(reinterpret_cast<const char*>(data_) + offset, region_left); |
74 | return (region_left == to_read) |
75 | ? OkStatus() |
76 | : Status(error::OUT_OF_RANGE, "Read less bytes than requested" ); |
77 | } |
78 | |
79 | private: |
80 | const void* const data_; |
81 | const uint64 length_; |
82 | // intentionally copyable |
83 | }; |
84 | |
85 | } // namespace |
86 | |
87 | MemmappedFileSystem::MemmappedFileSystem() {} |
88 | |
89 | Status MemmappedFileSystem::FileExists(const string& fname, |
90 | TransactionToken* token) { |
91 | if (!mapped_memory_) { |
92 | return errors::FailedPrecondition("MemmappedEnv is not initialized" ); |
93 | } |
94 | const auto dir_element = directory_.find(fname); |
95 | if (dir_element != directory_.end()) { |
96 | return OkStatus(); |
97 | } |
98 | return errors::NotFound(fname, " not found" ); |
99 | } |
100 | |
101 | Status MemmappedFileSystem::NewRandomAccessFile( |
102 | const string& filename, TransactionToken* token, |
103 | std::unique_ptr<RandomAccessFile>* result) { |
104 | if (!mapped_memory_) { |
105 | return errors::FailedPrecondition("MemmappedEnv is not initialized" ); |
106 | } |
107 | const auto dir_element = directory_.find(filename); |
108 | if (dir_element == directory_.end()) { |
109 | return errors::NotFound("Region " , filename, " is not found" ); |
110 | } |
111 | result->reset(new RandomAccessFileFromMemmapped( |
112 | GetMemoryWithOffset(dir_element->second.offset), |
113 | dir_element->second.length)); |
114 | return OkStatus(); |
115 | } |
116 | |
117 | Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile( |
118 | const string& filename, TransactionToken* token, |
119 | std::unique_ptr<ReadOnlyMemoryRegion>* result) { |
120 | if (!mapped_memory_) { |
121 | return errors::FailedPrecondition("MemmappedEnv is not initialized" ); |
122 | } |
123 | const auto dir_element = directory_.find(filename); |
124 | if (dir_element == directory_.end()) { |
125 | return errors::NotFound("Region " , filename, " is not found" ); |
126 | } |
127 | result->reset(new ReadOnlyMemoryRegionFromMemmapped( |
128 | GetMemoryWithOffset(dir_element->second.offset), |
129 | dir_element->second.length)); |
130 | return OkStatus(); |
131 | } |
132 | |
133 | Status MemmappedFileSystem::GetFileSize(const string& filename, |
134 | TransactionToken* token, uint64* size) { |
135 | if (!mapped_memory_) { |
136 | return errors::FailedPrecondition("MemmappedEnv is not initialized" ); |
137 | } |
138 | const auto dir_element = directory_.find(filename); |
139 | if (dir_element == directory_.end()) { |
140 | return errors::NotFound("Region " , filename, " is not found" ); |
141 | } |
142 | *size = dir_element->second.length; |
143 | return OkStatus(); |
144 | } |
145 | |
146 | Status MemmappedFileSystem::Stat(const string& fname, TransactionToken* token, |
147 | FileStatistics* stat) { |
148 | uint64 size; |
149 | auto status = GetFileSize(fname, token, &size); |
150 | if (status.ok()) { |
151 | stat->length = size; |
152 | } |
153 | return status; |
154 | } |
155 | |
156 | Status MemmappedFileSystem::NewWritableFile(const string& filename, |
157 | TransactionToken* token, |
158 | std::unique_ptr<WritableFile>* wf) { |
159 | return errors::Unimplemented("memmapped format doesn't support writing" ); |
160 | } |
161 | |
162 | Status MemmappedFileSystem::NewAppendableFile( |
163 | const string& filename, TransactionToken* token, |
164 | std::unique_ptr<WritableFile>* result) { |
165 | return errors::Unimplemented("memmapped format doesn't support writing" ); |
166 | } |
167 | |
168 | Status MemmappedFileSystem::GetChildren(const string& filename, |
169 | TransactionToken* token, |
170 | std::vector<string>* strings) { |
171 | return errors::Unimplemented("memmapped format doesn't support GetChildren" ); |
172 | } |
173 | |
174 | Status MemmappedFileSystem::GetMatchingPaths(const string& pattern, |
175 | TransactionToken* token, |
176 | std::vector<string>* results) { |
177 | return errors::Unimplemented( |
178 | "memmapped format doesn't support GetMatchingPaths" ); |
179 | } |
180 | |
181 | Status MemmappedFileSystem::DeleteFile(const string& filename, |
182 | TransactionToken* token) { |
183 | return errors::Unimplemented("memmapped format doesn't support DeleteFile" ); |
184 | } |
185 | |
186 | Status MemmappedFileSystem::CreateDir(const string& dirname, |
187 | TransactionToken* token) { |
188 | return errors::Unimplemented("memmapped format doesn't support CreateDir" ); |
189 | } |
190 | |
191 | Status MemmappedFileSystem::DeleteDir(const string& dirname, |
192 | TransactionToken* token) { |
193 | return errors::Unimplemented("memmapped format doesn't support DeleteDir" ); |
194 | } |
195 | |
196 | Status MemmappedFileSystem::RenameFile(const string& filename_from, |
197 | const string& filename_to, |
198 | TransactionToken* token) { |
199 | return errors::Unimplemented("memmapped format doesn't support RenameFile" ); |
200 | } |
201 | |
202 | const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const { |
203 | return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset; |
204 | } |
205 | |
206 | constexpr const char MemmappedFileSystem::kMemmappedPackagePrefix[]; |
207 | constexpr const char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[]; |
208 | |
209 | Status MemmappedFileSystem::InitializeFromFile(Env* env, |
210 | const string& filename) { |
211 | TF_RETURN_IF_ERROR( |
212 | env->NewReadOnlyMemoryRegionFromFile(filename, &mapped_memory_)); |
213 | directory_.clear(); |
214 | if (mapped_memory_->length() <= sizeof(uint64)) { |
215 | return errors::DataLoss("Corrupted memmapped model file: " , filename, |
216 | " Invalid package size" ); |
217 | } |
218 | const auto memory_start = |
219 | reinterpret_cast<const uint8*>(mapped_memory_->data()); |
220 | const uint64 directory_offset = DecodeUint64LittleEndian( |
221 | memory_start + mapped_memory_->length() - sizeof(uint64)); |
222 | if (directory_offset > mapped_memory_->length() - sizeof(uint64)) { |
223 | return errors::DataLoss("Corrupted memmapped model file: " , filename, |
224 | " Invalid directory offset" ); |
225 | } |
226 | MemmappedFileSystemDirectory proto_directory; |
227 | if (!ParseProtoUnlimited( |
228 | &proto_directory, memory_start + directory_offset, |
229 | mapped_memory_->length() - directory_offset - sizeof(uint64))) { |
230 | return errors::DataLoss("Corrupted memmapped model file: " , filename, |
231 | " Can't parse its internal directory" ); |
232 | } |
233 | |
234 | // Iterating in reverse order to get lengths of elements; |
235 | uint64 prev_element_offset = directory_offset; |
236 | for (auto element_iter = proto_directory.element().rbegin(); |
237 | element_iter != proto_directory.element().rend(); ++element_iter) { |
238 | // Check that the element offset is in the right range. |
239 | if (element_iter->offset() >= prev_element_offset) { |
240 | return errors::DataLoss("Corrupted memmapped model file: " , filename, |
241 | " Invalid offset of internal component" ); |
242 | } |
243 | if (!directory_ |
244 | .insert(std::make_pair( |
245 | element_iter->name(), |
246 | FileRegion(element_iter->offset(), element_iter->length()))) |
247 | .second) { |
248 | return errors::DataLoss("Corrupted memmapped model file: " , filename, |
249 | " Duplicate name of internal component " , |
250 | element_iter->name()); |
251 | } |
252 | prev_element_offset = element_iter->offset(); |
253 | } |
254 | return OkStatus(); |
255 | } |
256 | |
257 | bool MemmappedFileSystem::IsMemmappedPackageFilename(const string& filename) { |
258 | return absl::StartsWith(filename, kMemmappedPackagePrefix); |
259 | } |
260 | |
261 | namespace { |
262 | bool IsValidRegionChar(char c) { |
263 | return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || |
264 | (c >= '0' && c <= '9') || c == '_' || c == '.'; |
265 | } |
266 | } // namespace |
267 | |
268 | bool MemmappedFileSystem::IsWellFormedMemmappedPackageFilename( |
269 | const string& filename) { |
270 | if (!IsMemmappedPackageFilename(filename)) { |
271 | return false; |
272 | } |
273 | for (char c : |
274 | filename.substr(strlen(kMemmappedPackagePrefix), |
275 | filename.length() - strlen(kMemmappedPackagePrefix))) { |
276 | if (!IsValidRegionChar(c)) { |
277 | return false; |
278 | } |
279 | } |
280 | return true; |
281 | } |
282 | |
283 | MemmappedEnv::MemmappedEnv(Env* env) : EnvWrapper(env) {} |
284 | |
285 | Status MemmappedEnv::GetFileSystemForFile(const string& fname, |
286 | FileSystem** result) { |
287 | if (MemmappedFileSystem::IsMemmappedPackageFilename(fname)) { |
288 | if (!memmapped_file_system_) { |
289 | return errors::FailedPrecondition( |
290 | "MemmappedEnv is not initialized from a file." ); |
291 | } |
292 | *result = memmapped_file_system_.get(); |
293 | return OkStatus(); |
294 | } |
295 | return EnvWrapper::GetFileSystemForFile(fname, result); |
296 | } |
297 | |
298 | Status MemmappedEnv::GetRegisteredFileSystemSchemes( |
299 | std::vector<string>* schemes) { |
300 | const auto status = EnvWrapper::GetRegisteredFileSystemSchemes(schemes); |
301 | if (status.ok()) { |
302 | schemes->emplace_back(MemmappedFileSystem::kMemmappedPackagePrefix); |
303 | } |
304 | return status; |
305 | } |
306 | |
307 | Status MemmappedEnv::InitializeFromFile(const string& package_filename) { |
308 | std::unique_ptr<MemmappedFileSystem> file_system_ptr(new MemmappedFileSystem); |
309 | const auto status = |
310 | file_system_ptr->InitializeFromFile(target(), package_filename); |
311 | if (status.ok()) { |
312 | memmapped_file_system_ = std::move(file_system_ptr); |
313 | } |
314 | return status; |
315 | } |
316 | |
317 | } // namespace tensorflow |
318 | |