1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#include "tensorflow/core/util/memmapped_file_system.h"
16
17#include "tensorflow/core/lib/core/errors.h"
18#include "tensorflow/core/lib/strings/str_util.h"
19#include "tensorflow/core/platform/protobuf.h"
20#include "tensorflow/core/util/memmapped_file_system.pb.h"
21
22namespace tensorflow {
23
24namespace {
25
26uint64 DecodeUint64LittleEndian(const uint8* buffer) {
27 uint64 result = 0;
28 for (int i = 0; i < static_cast<int>(sizeof(uint64)); ++i) {
29 result |= static_cast<uint64>(buffer[i]) << (8 * i);
30 }
31 return result;
32}
33
34} // namespace
35
36namespace {
37
38class ReadOnlyMemoryRegionFromMemmapped : public ReadOnlyMemoryRegion {
39 public:
40 ReadOnlyMemoryRegionFromMemmapped(const void* data, uint64 length)
41 : data_(data), length_(length) {}
42 ~ReadOnlyMemoryRegionFromMemmapped() override = default;
43 const void* data() override { return data_; }
44 uint64 length() override { return length_; }
45
46 private:
47 const void* const data_;
48 const uint64 length_;
49 // intentionally copyable
50};
51
52class RandomAccessFileFromMemmapped : public RandomAccessFile {
53 public:
54 RandomAccessFileFromMemmapped(const void* data, uint64 length)
55 : data_(data), length_(length) {}
56
57 ~RandomAccessFileFromMemmapped() override = default;
58
59 Status Name(StringPiece* result) const override {
60 return errors::Unimplemented(
61 "RandomAccessFileFromMemmapped does not support Name()");
62 }
63
64 Status Read(uint64 offset, size_t to_read, StringPiece* result,
65 char* scratch) const override {
66 if (offset >= length_) {
67 *result = StringPiece(scratch, 0);
68 return Status(error::OUT_OF_RANGE, "Read after file end");
69 }
70 const uint64 region_left =
71 std::min(length_ - offset, static_cast<uint64>(to_read));
72 *result =
73 StringPiece(reinterpret_cast<const char*>(data_) + offset, region_left);
74 return (region_left == to_read)
75 ? OkStatus()
76 : Status(error::OUT_OF_RANGE, "Read less bytes than requested");
77 }
78
79 private:
80 const void* const data_;
81 const uint64 length_;
82 // intentionally copyable
83};
84
85} // namespace
86
87MemmappedFileSystem::MemmappedFileSystem() {}
88
89Status MemmappedFileSystem::FileExists(const string& fname,
90 TransactionToken* token) {
91 if (!mapped_memory_) {
92 return errors::FailedPrecondition("MemmappedEnv is not initialized");
93 }
94 const auto dir_element = directory_.find(fname);
95 if (dir_element != directory_.end()) {
96 return OkStatus();
97 }
98 return errors::NotFound(fname, " not found");
99}
100
101Status MemmappedFileSystem::NewRandomAccessFile(
102 const string& filename, TransactionToken* token,
103 std::unique_ptr<RandomAccessFile>* result) {
104 if (!mapped_memory_) {
105 return errors::FailedPrecondition("MemmappedEnv is not initialized");
106 }
107 const auto dir_element = directory_.find(filename);
108 if (dir_element == directory_.end()) {
109 return errors::NotFound("Region ", filename, " is not found");
110 }
111 result->reset(new RandomAccessFileFromMemmapped(
112 GetMemoryWithOffset(dir_element->second.offset),
113 dir_element->second.length));
114 return OkStatus();
115}
116
117Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile(
118 const string& filename, TransactionToken* token,
119 std::unique_ptr<ReadOnlyMemoryRegion>* result) {
120 if (!mapped_memory_) {
121 return errors::FailedPrecondition("MemmappedEnv is not initialized");
122 }
123 const auto dir_element = directory_.find(filename);
124 if (dir_element == directory_.end()) {
125 return errors::NotFound("Region ", filename, " is not found");
126 }
127 result->reset(new ReadOnlyMemoryRegionFromMemmapped(
128 GetMemoryWithOffset(dir_element->second.offset),
129 dir_element->second.length));
130 return OkStatus();
131}
132
133Status MemmappedFileSystem::GetFileSize(const string& filename,
134 TransactionToken* token, uint64* size) {
135 if (!mapped_memory_) {
136 return errors::FailedPrecondition("MemmappedEnv is not initialized");
137 }
138 const auto dir_element = directory_.find(filename);
139 if (dir_element == directory_.end()) {
140 return errors::NotFound("Region ", filename, " is not found");
141 }
142 *size = dir_element->second.length;
143 return OkStatus();
144}
145
146Status MemmappedFileSystem::Stat(const string& fname, TransactionToken* token,
147 FileStatistics* stat) {
148 uint64 size;
149 auto status = GetFileSize(fname, token, &size);
150 if (status.ok()) {
151 stat->length = size;
152 }
153 return status;
154}
155
156Status MemmappedFileSystem::NewWritableFile(const string& filename,
157 TransactionToken* token,
158 std::unique_ptr<WritableFile>* wf) {
159 return errors::Unimplemented("memmapped format doesn't support writing");
160}
161
162Status MemmappedFileSystem::NewAppendableFile(
163 const string& filename, TransactionToken* token,
164 std::unique_ptr<WritableFile>* result) {
165 return errors::Unimplemented("memmapped format doesn't support writing");
166}
167
168Status MemmappedFileSystem::GetChildren(const string& filename,
169 TransactionToken* token,
170 std::vector<string>* strings) {
171 return errors::Unimplemented("memmapped format doesn't support GetChildren");
172}
173
174Status MemmappedFileSystem::GetMatchingPaths(const string& pattern,
175 TransactionToken* token,
176 std::vector<string>* results) {
177 return errors::Unimplemented(
178 "memmapped format doesn't support GetMatchingPaths");
179}
180
181Status MemmappedFileSystem::DeleteFile(const string& filename,
182 TransactionToken* token) {
183 return errors::Unimplemented("memmapped format doesn't support DeleteFile");
184}
185
186Status MemmappedFileSystem::CreateDir(const string& dirname,
187 TransactionToken* token) {
188 return errors::Unimplemented("memmapped format doesn't support CreateDir");
189}
190
191Status MemmappedFileSystem::DeleteDir(const string& dirname,
192 TransactionToken* token) {
193 return errors::Unimplemented("memmapped format doesn't support DeleteDir");
194}
195
196Status MemmappedFileSystem::RenameFile(const string& filename_from,
197 const string& filename_to,
198 TransactionToken* token) {
199 return errors::Unimplemented("memmapped format doesn't support RenameFile");
200}
201
202const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const {
203 return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset;
204}
205
206constexpr const char MemmappedFileSystem::kMemmappedPackagePrefix[];
207constexpr const char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[];
208
209Status MemmappedFileSystem::InitializeFromFile(Env* env,
210 const string& filename) {
211 TF_RETURN_IF_ERROR(
212 env->NewReadOnlyMemoryRegionFromFile(filename, &mapped_memory_));
213 directory_.clear();
214 if (mapped_memory_->length() <= sizeof(uint64)) {
215 return errors::DataLoss("Corrupted memmapped model file: ", filename,
216 " Invalid package size");
217 }
218 const auto memory_start =
219 reinterpret_cast<const uint8*>(mapped_memory_->data());
220 const uint64 directory_offset = DecodeUint64LittleEndian(
221 memory_start + mapped_memory_->length() - sizeof(uint64));
222 if (directory_offset > mapped_memory_->length() - sizeof(uint64)) {
223 return errors::DataLoss("Corrupted memmapped model file: ", filename,
224 " Invalid directory offset");
225 }
226 MemmappedFileSystemDirectory proto_directory;
227 if (!ParseProtoUnlimited(
228 &proto_directory, memory_start + directory_offset,
229 mapped_memory_->length() - directory_offset - sizeof(uint64))) {
230 return errors::DataLoss("Corrupted memmapped model file: ", filename,
231 " Can't parse its internal directory");
232 }
233
234 // Iterating in reverse order to get lengths of elements;
235 uint64 prev_element_offset = directory_offset;
236 for (auto element_iter = proto_directory.element().rbegin();
237 element_iter != proto_directory.element().rend(); ++element_iter) {
238 // Check that the element offset is in the right range.
239 if (element_iter->offset() >= prev_element_offset) {
240 return errors::DataLoss("Corrupted memmapped model file: ", filename,
241 " Invalid offset of internal component");
242 }
243 if (!directory_
244 .insert(std::make_pair(
245 element_iter->name(),
246 FileRegion(element_iter->offset(), element_iter->length())))
247 .second) {
248 return errors::DataLoss("Corrupted memmapped model file: ", filename,
249 " Duplicate name of internal component ",
250 element_iter->name());
251 }
252 prev_element_offset = element_iter->offset();
253 }
254 return OkStatus();
255}
256
257bool MemmappedFileSystem::IsMemmappedPackageFilename(const string& filename) {
258 return absl::StartsWith(filename, kMemmappedPackagePrefix);
259}
260
261namespace {
262bool IsValidRegionChar(char c) {
263 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
264 (c >= '0' && c <= '9') || c == '_' || c == '.';
265}
266} // namespace
267
268bool MemmappedFileSystem::IsWellFormedMemmappedPackageFilename(
269 const string& filename) {
270 if (!IsMemmappedPackageFilename(filename)) {
271 return false;
272 }
273 for (char c :
274 filename.substr(strlen(kMemmappedPackagePrefix),
275 filename.length() - strlen(kMemmappedPackagePrefix))) {
276 if (!IsValidRegionChar(c)) {
277 return false;
278 }
279 }
280 return true;
281}
282
283MemmappedEnv::MemmappedEnv(Env* env) : EnvWrapper(env) {}
284
285Status MemmappedEnv::GetFileSystemForFile(const string& fname,
286 FileSystem** result) {
287 if (MemmappedFileSystem::IsMemmappedPackageFilename(fname)) {
288 if (!memmapped_file_system_) {
289 return errors::FailedPrecondition(
290 "MemmappedEnv is not initialized from a file.");
291 }
292 *result = memmapped_file_system_.get();
293 return OkStatus();
294 }
295 return EnvWrapper::GetFileSystemForFile(fname, result);
296}
297
298Status MemmappedEnv::GetRegisteredFileSystemSchemes(
299 std::vector<string>* schemes) {
300 const auto status = EnvWrapper::GetRegisteredFileSystemSchemes(schemes);
301 if (status.ok()) {
302 schemes->emplace_back(MemmappedFileSystem::kMemmappedPackagePrefix);
303 }
304 return status;
305}
306
307Status MemmappedEnv::InitializeFromFile(const string& package_filename) {
308 std::unique_ptr<MemmappedFileSystem> file_system_ptr(new MemmappedFileSystem);
309 const auto status =
310 file_system_ptr->InitializeFromFile(target(), package_filename);
311 if (status.ok()) {
312 memmapped_file_system_ = std::move(file_system_ptr);
313 }
314 return status;
315}
316
317} // namespace tensorflow
318