1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | |
5 | #include <dirent.h> |
6 | #include <fcntl.h> |
7 | #include <sys/mman.h> |
8 | #ifndef __Fuchsia__ |
9 | #include <sys/resource.h> |
10 | #endif |
11 | #include <sys/stat.h> |
12 | #include <sys/time.h> |
13 | #include <sys/types.h> |
14 | #include <unistd.h> |
15 | |
16 | #include <atomic> |
17 | #include <cerrno> |
18 | #include <cstddef> |
19 | #include <cstdint> |
20 | #include <cstdio> |
21 | #include <cstdlib> |
22 | #include <cstring> |
23 | #include <limits> |
24 | #include <queue> |
25 | #include <set> |
26 | #include <string> |
27 | #include <thread> |
28 | #include <type_traits> |
29 | #include <utility> |
30 | |
31 | #include "leveldb/env.h" |
32 | #include "leveldb/slice.h" |
33 | #include "leveldb/status.h" |
34 | #include "port/port.h" |
35 | #include "port/thread_annotations.h" |
36 | #include "util/env_posix_test_helper.h" |
37 | #include "util/posix_logger.h" |
38 | |
39 | namespace leveldb { |
40 | |
41 | namespace { |
42 | |
43 | // Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles(). |
44 | int g_open_read_only_file_limit = -1; |
45 | |
46 | // Up to 1000 mmap regions for 64-bit binaries; none for 32-bit. |
47 | constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 1000 : 0; |
48 | |
49 | // Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit(). |
50 | int g_mmap_limit = kDefaultMmapLimit; |
51 | |
52 | // Common flags defined for all posix open operations |
53 | #if defined(HAVE_O_CLOEXEC) |
54 | constexpr const int kOpenBaseFlags = O_CLOEXEC; |
55 | #else |
56 | constexpr const int kOpenBaseFlags = 0; |
57 | #endif // defined(HAVE_O_CLOEXEC) |
58 | |
59 | constexpr const size_t kWritableFileBufferSize = 65536; |
60 | |
61 | Status PosixError(const std::string& context, int error_number) { |
62 | if (error_number == ENOENT) { |
63 | return Status::NotFound(context, std::strerror(error_number)); |
64 | } else { |
65 | return Status::IOError(context, std::strerror(error_number)); |
66 | } |
67 | } |
68 | |
69 | // Helper class to limit resource usage to avoid exhaustion. |
70 | // Currently used to limit read-only file descriptors and mmap file usage |
71 | // so that we do not run out of file descriptors or virtual memory, or run into |
72 | // kernel performance problems for very large databases. |
73 | class Limiter { |
74 | public: |
75 | // Limit maximum number of resources to |max_acquires|. |
76 | Limiter(int max_acquires) |
77 | : |
78 | #if !defined(NDEBUG) |
79 | max_acquires_(max_acquires), |
80 | #endif // !defined(NDEBUG) |
81 | acquires_allowed_(max_acquires) { |
82 | assert(max_acquires >= 0); |
83 | } |
84 | |
85 | Limiter(const Limiter&) = delete; |
86 | Limiter operator=(const Limiter&) = delete; |
87 | |
88 | // If another resource is available, acquire it and return true. |
89 | // Else return false. |
90 | bool Acquire() { |
91 | int old_acquires_allowed = |
92 | acquires_allowed_.fetch_sub(1, std::memory_order_relaxed); |
93 | |
94 | if (old_acquires_allowed > 0) return true; |
95 | |
96 | int pre_increment_acquires_allowed = |
97 | acquires_allowed_.fetch_add(1, std::memory_order_relaxed); |
98 | |
99 | // Silence compiler warnings about unused arguments when NDEBUG is defined. |
100 | (void)pre_increment_acquires_allowed; |
101 | // If the check below fails, Release() was called more times than acquire. |
102 | assert(pre_increment_acquires_allowed < max_acquires_); |
103 | |
104 | return false; |
105 | } |
106 | |
107 | // Release a resource acquired by a previous call to Acquire() that returned |
108 | // true. |
109 | void Release() { |
110 | int old_acquires_allowed = |
111 | acquires_allowed_.fetch_add(1, std::memory_order_relaxed); |
112 | |
113 | // Silence compiler warnings about unused arguments when NDEBUG is defined. |
114 | (void)old_acquires_allowed; |
115 | // If the check below fails, Release() was called more times than acquire. |
116 | assert(old_acquires_allowed < max_acquires_); |
117 | } |
118 | |
119 | private: |
120 | #if !defined(NDEBUG) |
121 | // Catches an excessive number of Release() calls. |
122 | const int max_acquires_; |
123 | #endif // !defined(NDEBUG) |
124 | |
125 | // The number of available resources. |
126 | // |
127 | // This is a counter and is not tied to the invariants of any other class, so |
128 | // it can be operated on safely using std::memory_order_relaxed. |
129 | std::atomic<int> acquires_allowed_; |
130 | }; |
131 | |
132 | // Implements sequential read access in a file using read(). |
133 | // |
134 | // Instances of this class are thread-friendly but not thread-safe, as required |
135 | // by the SequentialFile API. |
136 | class PosixSequentialFile final : public SequentialFile { |
137 | public: |
138 | PosixSequentialFile(std::string filename, int fd) |
139 | : fd_(fd), filename_(std::move(filename)) {} |
140 | ~PosixSequentialFile() override { close(fd_); } |
141 | |
142 | Status Read(size_t n, Slice* result, char* scratch) override { |
143 | Status status; |
144 | while (true) { |
145 | ::ssize_t read_size = ::read(fd_, scratch, n); |
146 | if (read_size < 0) { // Read error. |
147 | if (errno == EINTR) { |
148 | continue; // Retry |
149 | } |
150 | status = PosixError(filename_, errno); |
151 | break; |
152 | } |
153 | *result = Slice(scratch, read_size); |
154 | break; |
155 | } |
156 | return status; |
157 | } |
158 | |
159 | Status Skip(uint64_t n) override { |
160 | if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) { |
161 | return PosixError(filename_, errno); |
162 | } |
163 | return Status::OK(); |
164 | } |
165 | |
166 | private: |
167 | const int fd_; |
168 | const std::string filename_; |
169 | }; |
170 | |
171 | // Implements random read access in a file using pread(). |
172 | // |
173 | // Instances of this class are thread-safe, as required by the RandomAccessFile |
174 | // API. Instances are immutable and Read() only calls thread-safe library |
175 | // functions. |
176 | class PosixRandomAccessFile final : public RandomAccessFile { |
177 | public: |
178 | // The new instance takes ownership of |fd|. |fd_limiter| must outlive this |
179 | // instance, and will be used to determine if . |
180 | PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter) |
181 | : has_permanent_fd_(fd_limiter->Acquire()), |
182 | fd_(has_permanent_fd_ ? fd : -1), |
183 | fd_limiter_(fd_limiter), |
184 | filename_(std::move(filename)) { |
185 | if (!has_permanent_fd_) { |
186 | assert(fd_ == -1); |
187 | ::close(fd); // The file will be opened on every read. |
188 | } |
189 | } |
190 | |
191 | ~PosixRandomAccessFile() override { |
192 | if (has_permanent_fd_) { |
193 | assert(fd_ != -1); |
194 | ::close(fd_); |
195 | fd_limiter_->Release(); |
196 | } |
197 | } |
198 | |
199 | Status Read(uint64_t offset, size_t n, Slice* result, |
200 | char* scratch) const override { |
201 | int fd = fd_; |
202 | if (!has_permanent_fd_) { |
203 | fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags); |
204 | if (fd < 0) { |
205 | return PosixError(filename_, errno); |
206 | } |
207 | } |
208 | |
209 | assert(fd != -1); |
210 | |
211 | Status status; |
212 | ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset)); |
213 | *result = Slice(scratch, (read_size < 0) ? 0 : read_size); |
214 | if (read_size < 0) { |
215 | // An error: return a non-ok status. |
216 | status = PosixError(filename_, errno); |
217 | } |
218 | if (!has_permanent_fd_) { |
219 | // Close the temporary file descriptor opened earlier. |
220 | assert(fd != fd_); |
221 | ::close(fd); |
222 | } |
223 | return status; |
224 | } |
225 | |
226 | private: |
227 | const bool has_permanent_fd_; // If false, the file is opened on every read. |
228 | const int fd_; // -1 if has_permanent_fd_ is false. |
229 | Limiter* const fd_limiter_; |
230 | const std::string filename_; |
231 | }; |
232 | |
233 | // Implements random read access in a file using mmap(). |
234 | // |
235 | // Instances of this class are thread-safe, as required by the RandomAccessFile |
236 | // API. Instances are immutable and Read() only calls thread-safe library |
237 | // functions. |
238 | class PosixMmapReadableFile final : public RandomAccessFile { |
239 | public: |
240 | // mmap_base[0, length-1] points to the memory-mapped contents of the file. It |
241 | // must be the result of a successful call to mmap(). This instances takes |
242 | // over the ownership of the region. |
243 | // |
244 | // |mmap_limiter| must outlive this instance. The caller must have already |
245 | // acquired the right to use one mmap region, which will be released when this |
246 | // instance is destroyed. |
247 | PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length, |
248 | Limiter* mmap_limiter) |
249 | : mmap_base_(mmap_base), |
250 | length_(length), |
251 | mmap_limiter_(mmap_limiter), |
252 | filename_(std::move(filename)) {} |
253 | |
254 | ~PosixMmapReadableFile() override { |
255 | ::munmap(static_cast<void*>(mmap_base_), length_); |
256 | mmap_limiter_->Release(); |
257 | } |
258 | |
259 | Status Read(uint64_t offset, size_t n, Slice* result, |
260 | char* scratch) const override { |
261 | if (offset + n > length_) { |
262 | *result = Slice(); |
263 | return PosixError(filename_, EINVAL); |
264 | } |
265 | |
266 | *result = Slice(mmap_base_ + offset, n); |
267 | return Status::OK(); |
268 | } |
269 | |
270 | private: |
271 | char* const mmap_base_; |
272 | const size_t length_; |
273 | Limiter* const mmap_limiter_; |
274 | const std::string filename_; |
275 | }; |
276 | |
277 | class PosixWritableFile final : public WritableFile { |
278 | public: |
279 | PosixWritableFile(std::string filename, int fd) |
280 | : pos_(0), |
281 | fd_(fd), |
282 | is_manifest_(IsManifest(filename)), |
283 | filename_(std::move(filename)), |
284 | dirname_(Dirname(filename_)) {} |
285 | |
286 | ~PosixWritableFile() override { |
287 | if (fd_ >= 0) { |
288 | // Ignoring any potential errors |
289 | Close(); |
290 | } |
291 | } |
292 | |
293 | Status Append(const Slice& data) override { |
294 | size_t write_size = data.size(); |
295 | const char* write_data = data.data(); |
296 | |
297 | // Fit as much as possible into buffer. |
298 | size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_); |
299 | std::memcpy(buf_ + pos_, write_data, copy_size); |
300 | write_data += copy_size; |
301 | write_size -= copy_size; |
302 | pos_ += copy_size; |
303 | if (write_size == 0) { |
304 | return Status::OK(); |
305 | } |
306 | |
307 | // Can't fit in buffer, so need to do at least one write. |
308 | Status status = FlushBuffer(); |
309 | if (!status.ok()) { |
310 | return status; |
311 | } |
312 | |
313 | // Small writes go to buffer, large writes are written directly. |
314 | if (write_size < kWritableFileBufferSize) { |
315 | std::memcpy(buf_, write_data, write_size); |
316 | pos_ = write_size; |
317 | return Status::OK(); |
318 | } |
319 | return WriteUnbuffered(write_data, write_size); |
320 | } |
321 | |
322 | Status Close() override { |
323 | Status status = FlushBuffer(); |
324 | const int close_result = ::close(fd_); |
325 | if (close_result < 0 && status.ok()) { |
326 | status = PosixError(filename_, errno); |
327 | } |
328 | fd_ = -1; |
329 | return status; |
330 | } |
331 | |
332 | Status Flush() override { return FlushBuffer(); } |
333 | |
334 | Status Sync() override { |
335 | // Ensure new files referred to by the manifest are in the filesystem. |
336 | // |
337 | // This needs to happen before the manifest file is flushed to disk, to |
338 | // avoid crashing in a state where the manifest refers to files that are not |
339 | // yet on disk. |
340 | Status status = SyncDirIfManifest(); |
341 | if (!status.ok()) { |
342 | return status; |
343 | } |
344 | |
345 | status = FlushBuffer(); |
346 | if (!status.ok()) { |
347 | return status; |
348 | } |
349 | |
350 | return SyncFd(fd_, filename_); |
351 | } |
352 | |
353 | private: |
354 | Status FlushBuffer() { |
355 | Status status = WriteUnbuffered(buf_, pos_); |
356 | pos_ = 0; |
357 | return status; |
358 | } |
359 | |
360 | Status WriteUnbuffered(const char* data, size_t size) { |
361 | while (size > 0) { |
362 | ssize_t write_result = ::write(fd_, data, size); |
363 | if (write_result < 0) { |
364 | if (errno == EINTR) { |
365 | continue; // Retry |
366 | } |
367 | return PosixError(filename_, errno); |
368 | } |
369 | data += write_result; |
370 | size -= write_result; |
371 | } |
372 | return Status::OK(); |
373 | } |
374 | |
375 | Status SyncDirIfManifest() { |
376 | Status status; |
377 | if (!is_manifest_) { |
378 | return status; |
379 | } |
380 | |
381 | int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags); |
382 | if (fd < 0) { |
383 | status = PosixError(dirname_, errno); |
384 | } else { |
385 | status = SyncFd(fd, dirname_); |
386 | ::close(fd); |
387 | } |
388 | return status; |
389 | } |
390 | |
391 | // Ensures that all the caches associated with the given file descriptor's |
392 | // data are flushed all the way to durable media, and can withstand power |
393 | // failures. |
394 | // |
395 | // The path argument is only used to populate the description string in the |
396 | // returned Status if an error occurs. |
397 | static Status SyncFd(int fd, const std::string& fd_path) { |
398 | #if HAVE_FULLFSYNC |
399 | // On macOS and iOS, fsync() doesn't guarantee durability past power |
400 | // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some |
401 | // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to |
402 | // fsync(). |
403 | if (::fcntl(fd, F_FULLFSYNC) == 0) { |
404 | return Status::OK(); |
405 | } |
406 | #endif // HAVE_FULLFSYNC |
407 | |
408 | #if HAVE_FDATASYNC |
409 | bool sync_success = ::fdatasync(fd) == 0; |
410 | #else |
411 | bool sync_success = ::fsync(fd) == 0; |
412 | #endif // HAVE_FDATASYNC |
413 | |
414 | if (sync_success) { |
415 | return Status::OK(); |
416 | } |
417 | return PosixError(fd_path, errno); |
418 | } |
419 | |
420 | // Returns the directory name in a path pointing to a file. |
421 | // |
422 | // Returns "." if the path does not contain any directory separator. |
423 | static std::string Dirname(const std::string& filename) { |
424 | std::string::size_type separator_pos = filename.rfind('/'); |
425 | if (separator_pos == std::string::npos) { |
426 | return std::string("." ); |
427 | } |
428 | // The filename component should not contain a path separator. If it does, |
429 | // the splitting was done incorrectly. |
430 | assert(filename.find('/', separator_pos + 1) == std::string::npos); |
431 | |
432 | return filename.substr(0, separator_pos); |
433 | } |
434 | |
435 | // Extracts the file name from a path pointing to a file. |
436 | // |
437 | // The returned Slice points to |filename|'s data buffer, so it is only valid |
438 | // while |filename| is alive and unchanged. |
439 | static Slice Basename(const std::string& filename) { |
440 | std::string::size_type separator_pos = filename.rfind('/'); |
441 | if (separator_pos == std::string::npos) { |
442 | return Slice(filename); |
443 | } |
444 | // The filename component should not contain a path separator. If it does, |
445 | // the splitting was done incorrectly. |
446 | assert(filename.find('/', separator_pos + 1) == std::string::npos); |
447 | |
448 | return Slice(filename.data() + separator_pos + 1, |
449 | filename.length() - separator_pos - 1); |
450 | } |
451 | |
452 | // True if the given file is a manifest file. |
453 | static bool IsManifest(const std::string& filename) { |
454 | return Basename(filename).starts_with("MANIFEST" ); |
455 | } |
456 | |
457 | // buf_[0, pos_ - 1] contains data to be written to fd_. |
458 | char buf_[kWritableFileBufferSize]; |
459 | size_t pos_; |
460 | int fd_; |
461 | |
462 | const bool is_manifest_; // True if the file's name starts with MANIFEST. |
463 | const std::string filename_; |
464 | const std::string dirname_; // The directory of filename_. |
465 | }; |
466 | |
467 | int LockOrUnlock(int fd, bool lock) { |
468 | errno = 0; |
469 | struct ::flock file_lock_info; |
470 | std::memset(&file_lock_info, 0, sizeof(file_lock_info)); |
471 | file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK); |
472 | file_lock_info.l_whence = SEEK_SET; |
473 | file_lock_info.l_start = 0; |
474 | file_lock_info.l_len = 0; // Lock/unlock entire file. |
475 | return ::fcntl(fd, F_SETLK, &file_lock_info); |
476 | } |
477 | |
478 | // Instances are thread-safe because they are immutable. |
479 | class PosixFileLock : public FileLock { |
480 | public: |
481 | PosixFileLock(int fd, std::string filename) |
482 | : fd_(fd), filename_(std::move(filename)) {} |
483 | |
484 | int fd() const { return fd_; } |
485 | const std::string& filename() const { return filename_; } |
486 | |
487 | private: |
488 | const int fd_; |
489 | const std::string filename_; |
490 | }; |
491 | |
492 | // Tracks the files locked by PosixEnv::LockFile(). |
493 | // |
494 | // We maintain a separate set instead of relying on fcntl(F_SETLK) because |
495 | // fcntl(F_SETLK) does not provide any protection against multiple uses from the |
496 | // same process. |
497 | // |
498 | // Instances are thread-safe because all member data is guarded by a mutex. |
499 | class PosixLockTable { |
500 | public: |
501 | bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) { |
502 | mu_.Lock(); |
503 | bool succeeded = locked_files_.insert(fname).second; |
504 | mu_.Unlock(); |
505 | return succeeded; |
506 | } |
507 | void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) { |
508 | mu_.Lock(); |
509 | locked_files_.erase(fname); |
510 | mu_.Unlock(); |
511 | } |
512 | |
513 | private: |
514 | port::Mutex mu_; |
515 | std::set<std::string> locked_files_ GUARDED_BY(mu_); |
516 | }; |
517 | |
518 | class PosixEnv : public Env { |
519 | public: |
520 | PosixEnv(); |
521 | ~PosixEnv() override { |
522 | static const char msg[] = |
523 | "PosixEnv singleton destroyed. Unsupported behavior!\n" ; |
524 | std::fwrite(msg, 1, sizeof(msg), stderr); |
525 | std::abort(); |
526 | } |
527 | |
528 | Status NewSequentialFile(const std::string& filename, |
529 | SequentialFile** result) override { |
530 | int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); |
531 | if (fd < 0) { |
532 | *result = nullptr; |
533 | return PosixError(filename, errno); |
534 | } |
535 | |
536 | *result = new PosixSequentialFile(filename, fd); |
537 | return Status::OK(); |
538 | } |
539 | |
540 | Status NewRandomAccessFile(const std::string& filename, |
541 | RandomAccessFile** result) override { |
542 | *result = nullptr; |
543 | int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags); |
544 | if (fd < 0) { |
545 | return PosixError(filename, errno); |
546 | } |
547 | |
548 | if (!mmap_limiter_.Acquire()) { |
549 | *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_); |
550 | return Status::OK(); |
551 | } |
552 | |
553 | uint64_t file_size; |
554 | Status status = GetFileSize(filename, &file_size); |
555 | if (status.ok()) { |
556 | void* mmap_base = |
557 | ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); |
558 | if (mmap_base != MAP_FAILED) { |
559 | *result = new PosixMmapReadableFile(filename, |
560 | reinterpret_cast<char*>(mmap_base), |
561 | file_size, &mmap_limiter_); |
562 | } else { |
563 | status = PosixError(filename, errno); |
564 | } |
565 | } |
566 | ::close(fd); |
567 | if (!status.ok()) { |
568 | mmap_limiter_.Release(); |
569 | } |
570 | return status; |
571 | } |
572 | |
573 | Status NewWritableFile(const std::string& filename, |
574 | WritableFile** result) override { |
575 | int fd = ::open(filename.c_str(), |
576 | O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
577 | if (fd < 0) { |
578 | *result = nullptr; |
579 | return PosixError(filename, errno); |
580 | } |
581 | |
582 | *result = new PosixWritableFile(filename, fd); |
583 | return Status::OK(); |
584 | } |
585 | |
586 | Status NewAppendableFile(const std::string& filename, |
587 | WritableFile** result) override { |
588 | int fd = ::open(filename.c_str(), |
589 | O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
590 | if (fd < 0) { |
591 | *result = nullptr; |
592 | return PosixError(filename, errno); |
593 | } |
594 | |
595 | *result = new PosixWritableFile(filename, fd); |
596 | return Status::OK(); |
597 | } |
598 | |
599 | bool FileExists(const std::string& filename) override { |
600 | return ::access(filename.c_str(), F_OK) == 0; |
601 | } |
602 | |
603 | Status GetChildren(const std::string& directory_path, |
604 | std::vector<std::string>* result) override { |
605 | result->clear(); |
606 | ::DIR* dir = ::opendir(directory_path.c_str()); |
607 | if (dir == nullptr) { |
608 | return PosixError(directory_path, errno); |
609 | } |
610 | struct ::dirent* entry; |
611 | while ((entry = ::readdir(dir)) != nullptr) { |
612 | result->emplace_back(entry->d_name); |
613 | } |
614 | ::closedir(dir); |
615 | return Status::OK(); |
616 | } |
617 | |
618 | Status RemoveFile(const std::string& filename) override { |
619 | if (::unlink(filename.c_str()) != 0) { |
620 | return PosixError(filename, errno); |
621 | } |
622 | return Status::OK(); |
623 | } |
624 | |
625 | Status CreateDir(const std::string& dirname) override { |
626 | if (::mkdir(dirname.c_str(), 0755) != 0) { |
627 | return PosixError(dirname, errno); |
628 | } |
629 | return Status::OK(); |
630 | } |
631 | |
632 | Status RemoveDir(const std::string& dirname) override { |
633 | if (::rmdir(dirname.c_str()) != 0) { |
634 | return PosixError(dirname, errno); |
635 | } |
636 | return Status::OK(); |
637 | } |
638 | |
639 | Status GetFileSize(const std::string& filename, uint64_t* size) override { |
640 | struct ::stat file_stat; |
641 | if (::stat(filename.c_str(), &file_stat) != 0) { |
642 | *size = 0; |
643 | return PosixError(filename, errno); |
644 | } |
645 | *size = file_stat.st_size; |
646 | return Status::OK(); |
647 | } |
648 | |
649 | Status RenameFile(const std::string& from, const std::string& to) override { |
650 | if (std::rename(from.c_str(), to.c_str()) != 0) { |
651 | return PosixError(from, errno); |
652 | } |
653 | return Status::OK(); |
654 | } |
655 | |
656 | Status LockFile(const std::string& filename, FileLock** lock) override { |
657 | *lock = nullptr; |
658 | |
659 | int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644); |
660 | if (fd < 0) { |
661 | return PosixError(filename, errno); |
662 | } |
663 | |
664 | if (!locks_.Insert(filename)) { |
665 | ::close(fd); |
666 | return Status::IOError("lock " + filename, "already held by process" ); |
667 | } |
668 | |
669 | if (LockOrUnlock(fd, true) == -1) { |
670 | int lock_errno = errno; |
671 | ::close(fd); |
672 | locks_.Remove(filename); |
673 | return PosixError("lock " + filename, lock_errno); |
674 | } |
675 | |
676 | *lock = new PosixFileLock(fd, filename); |
677 | return Status::OK(); |
678 | } |
679 | |
680 | Status UnlockFile(FileLock* lock) override { |
681 | PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock); |
682 | if (LockOrUnlock(posix_file_lock->fd(), false) == -1) { |
683 | return PosixError("unlock " + posix_file_lock->filename(), errno); |
684 | } |
685 | locks_.Remove(posix_file_lock->filename()); |
686 | ::close(posix_file_lock->fd()); |
687 | delete posix_file_lock; |
688 | return Status::OK(); |
689 | } |
690 | |
691 | void Schedule(void (*background_work_function)(void* background_work_arg), |
692 | void* background_work_arg) override; |
693 | |
694 | void StartThread(void (*thread_main)(void* thread_main_arg), |
695 | void* thread_main_arg) override { |
696 | std::thread new_thread(thread_main, thread_main_arg); |
697 | new_thread.detach(); |
698 | } |
699 | |
700 | Status GetTestDirectory(std::string* result) override { |
701 | const char* env = std::getenv("TEST_TMPDIR" ); |
702 | if (env && env[0] != '\0') { |
703 | *result = env; |
704 | } else { |
705 | char buf[100]; |
706 | std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d" , |
707 | static_cast<int>(::geteuid())); |
708 | *result = buf; |
709 | } |
710 | |
711 | // The CreateDir status is ignored because the directory may already exist. |
712 | CreateDir(*result); |
713 | |
714 | return Status::OK(); |
715 | } |
716 | |
717 | Status NewLogger(const std::string& filename, Logger** result) override { |
718 | int fd = ::open(filename.c_str(), |
719 | O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644); |
720 | if (fd < 0) { |
721 | *result = nullptr; |
722 | return PosixError(filename, errno); |
723 | } |
724 | |
725 | std::FILE* fp = ::fdopen(fd, "w" ); |
726 | if (fp == nullptr) { |
727 | ::close(fd); |
728 | *result = nullptr; |
729 | return PosixError(filename, errno); |
730 | } else { |
731 | *result = new PosixLogger(fp); |
732 | return Status::OK(); |
733 | } |
734 | } |
735 | |
736 | uint64_t NowMicros() override { |
737 | static constexpr uint64_t kUsecondsPerSecond = 1000000; |
738 | struct ::timeval tv; |
739 | ::gettimeofday(&tv, nullptr); |
740 | return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec; |
741 | } |
742 | |
743 | void SleepForMicroseconds(int micros) override { |
744 | std::this_thread::sleep_for(std::chrono::microseconds(micros)); |
745 | } |
746 | |
747 | private: |
748 | void BackgroundThreadMain(); |
749 | |
750 | static void BackgroundThreadEntryPoint(PosixEnv* env) { |
751 | env->BackgroundThreadMain(); |
752 | } |
753 | |
754 | // Stores the work item data in a Schedule() call. |
755 | // |
756 | // Instances are constructed on the thread calling Schedule() and used on the |
757 | // background thread. |
758 | // |
759 | // This structure is thread-safe because it is immutable. |
760 | struct BackgroundWorkItem { |
761 | explicit BackgroundWorkItem(void (*function)(void* arg), void* arg) |
762 | : function(function), arg(arg) {} |
763 | |
764 | void (*const function)(void*); |
765 | void* const arg; |
766 | }; |
767 | |
768 | port::Mutex background_work_mutex_; |
769 | port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_); |
770 | bool started_background_thread_ GUARDED_BY(background_work_mutex_); |
771 | |
772 | std::queue<BackgroundWorkItem> background_work_queue_ |
773 | GUARDED_BY(background_work_mutex_); |
774 | |
775 | PosixLockTable locks_; // Thread-safe. |
776 | Limiter mmap_limiter_; // Thread-safe. |
777 | Limiter fd_limiter_; // Thread-safe. |
778 | }; |
779 | |
780 | // Return the maximum number of concurrent mmaps. |
781 | int MaxMmaps() { return g_mmap_limit; } |
782 | |
783 | // Return the maximum number of read-only files to keep open. |
784 | int MaxOpenFiles() { |
785 | if (g_open_read_only_file_limit >= 0) { |
786 | return g_open_read_only_file_limit; |
787 | } |
788 | #ifdef __Fuchsia__ |
789 | // Fuchsia doesn't implement getrlimit. |
790 | g_open_read_only_file_limit = 50; |
791 | #else |
792 | struct ::rlimit rlim; |
793 | if (::getrlimit(RLIMIT_NOFILE, &rlim)) { |
794 | // getrlimit failed, fallback to hard-coded default. |
795 | g_open_read_only_file_limit = 50; |
796 | } else if (rlim.rlim_cur == RLIM_INFINITY) { |
797 | g_open_read_only_file_limit = std::numeric_limits<int>::max(); |
798 | } else { |
799 | // Allow use of 20% of available file descriptors for read-only files. |
800 | g_open_read_only_file_limit = rlim.rlim_cur / 5; |
801 | } |
802 | #endif |
803 | return g_open_read_only_file_limit; |
804 | } |
805 | |
806 | } // namespace |
807 | |
808 | PosixEnv::PosixEnv() |
809 | : background_work_cv_(&background_work_mutex_), |
810 | started_background_thread_(false), |
811 | mmap_limiter_(MaxMmaps()), |
812 | fd_limiter_(MaxOpenFiles()) {} |
813 | |
814 | void PosixEnv::Schedule( |
815 | void (*background_work_function)(void* background_work_arg), |
816 | void* background_work_arg) { |
817 | background_work_mutex_.Lock(); |
818 | |
819 | // Start the background thread, if we haven't done so already. |
820 | if (!started_background_thread_) { |
821 | started_background_thread_ = true; |
822 | std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this); |
823 | background_thread.detach(); |
824 | } |
825 | |
826 | // If the queue is empty, the background thread may be waiting for work. |
827 | if (background_work_queue_.empty()) { |
828 | background_work_cv_.Signal(); |
829 | } |
830 | |
831 | background_work_queue_.emplace(background_work_function, background_work_arg); |
832 | background_work_mutex_.Unlock(); |
833 | } |
834 | |
835 | void PosixEnv::BackgroundThreadMain() { |
836 | while (true) { |
837 | background_work_mutex_.Lock(); |
838 | |
839 | // Wait until there is work to be done. |
840 | while (background_work_queue_.empty()) { |
841 | background_work_cv_.Wait(); |
842 | } |
843 | |
844 | assert(!background_work_queue_.empty()); |
845 | auto background_work_function = background_work_queue_.front().function; |
846 | void* background_work_arg = background_work_queue_.front().arg; |
847 | background_work_queue_.pop(); |
848 | |
849 | background_work_mutex_.Unlock(); |
850 | background_work_function(background_work_arg); |
851 | } |
852 | } |
853 | |
854 | namespace { |
855 | |
856 | // Wraps an Env instance whose destructor is never created. |
857 | // |
858 | // Intended usage: |
859 | // using PlatformSingletonEnv = SingletonEnv<PlatformEnv>; |
860 | // void ConfigurePosixEnv(int param) { |
861 | // PlatformSingletonEnv::AssertEnvNotInitialized(); |
862 | // // set global configuration flags. |
863 | // } |
864 | // Env* Env::Default() { |
865 | // static PlatformSingletonEnv default_env; |
866 | // return default_env.env(); |
867 | // } |
868 | template <typename EnvType> |
869 | class SingletonEnv { |
870 | public: |
871 | SingletonEnv() { |
872 | #if !defined(NDEBUG) |
873 | env_initialized_.store(true, std::memory_order_relaxed); |
874 | #endif // !defined(NDEBUG) |
875 | static_assert(sizeof(env_storage_) >= sizeof(EnvType), |
876 | "env_storage_ will not fit the Env" ); |
877 | static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType), |
878 | "env_storage_ does not meet the Env's alignment needs" ); |
879 | new (&env_storage_) EnvType(); |
880 | } |
881 | ~SingletonEnv() = default; |
882 | |
883 | SingletonEnv(const SingletonEnv&) = delete; |
884 | SingletonEnv& operator=(const SingletonEnv&) = delete; |
885 | |
886 | Env* env() { return reinterpret_cast<Env*>(&env_storage_); } |
887 | |
888 | static void AssertEnvNotInitialized() { |
889 | #if !defined(NDEBUG) |
890 | assert(!env_initialized_.load(std::memory_order_relaxed)); |
891 | #endif // !defined(NDEBUG) |
892 | } |
893 | |
894 | private: |
895 | typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type |
896 | env_storage_; |
897 | #if !defined(NDEBUG) |
898 | static std::atomic<bool> env_initialized_; |
899 | #endif // !defined(NDEBUG) |
900 | }; |
901 | |
902 | #if !defined(NDEBUG) |
903 | template <typename EnvType> |
904 | std::atomic<bool> SingletonEnv<EnvType>::env_initialized_; |
905 | #endif // !defined(NDEBUG) |
906 | |
907 | using PosixDefaultEnv = SingletonEnv<PosixEnv>; |
908 | |
909 | } // namespace |
910 | |
911 | void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) { |
912 | PosixDefaultEnv::AssertEnvNotInitialized(); |
913 | g_open_read_only_file_limit = limit; |
914 | } |
915 | |
916 | void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) { |
917 | PosixDefaultEnv::AssertEnvNotInitialized(); |
918 | g_mmap_limit = limit; |
919 | } |
920 | |
921 | Env* Env::Default() { |
922 | static PosixDefaultEnv env_container; |
923 | return env_container.env(); |
924 | } |
925 | |
926 | } // namespace leveldb |
927 | |