1// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5#include <dirent.h>
6#include <fcntl.h>
7#include <sys/mman.h>
8#ifndef __Fuchsia__
9#include <sys/resource.h>
10#endif
11#include <sys/stat.h>
12#include <sys/time.h>
13#include <sys/types.h>
14#include <unistd.h>
15
16#include <atomic>
17#include <cerrno>
18#include <cstddef>
19#include <cstdint>
20#include <cstdio>
21#include <cstdlib>
22#include <cstring>
23#include <limits>
24#include <queue>
25#include <set>
26#include <string>
27#include <thread>
28#include <type_traits>
29#include <utility>
30
31#include "leveldb/env.h"
32#include "leveldb/slice.h"
33#include "leveldb/status.h"
34#include "port/port.h"
35#include "port/thread_annotations.h"
36#include "util/env_posix_test_helper.h"
37#include "util/posix_logger.h"
38
39namespace leveldb {
40
41namespace {
42
43// Set by EnvPosixTestHelper::SetReadOnlyMMapLimit() and MaxOpenFiles().
44int g_open_read_only_file_limit = -1;
45
46// Up to 1000 mmap regions for 64-bit binaries; none for 32-bit.
47constexpr const int kDefaultMmapLimit = (sizeof(void*) >= 8) ? 1000 : 0;
48
49// Can be set using EnvPosixTestHelper::SetReadOnlyMMapLimit().
50int g_mmap_limit = kDefaultMmapLimit;
51
52// Common flags defined for all posix open operations
53#if defined(HAVE_O_CLOEXEC)
54constexpr const int kOpenBaseFlags = O_CLOEXEC;
55#else
56constexpr const int kOpenBaseFlags = 0;
57#endif // defined(HAVE_O_CLOEXEC)
58
59constexpr const size_t kWritableFileBufferSize = 65536;
60
61Status PosixError(const std::string& context, int error_number) {
62 if (error_number == ENOENT) {
63 return Status::NotFound(context, std::strerror(error_number));
64 } else {
65 return Status::IOError(context, std::strerror(error_number));
66 }
67}
68
69// Helper class to limit resource usage to avoid exhaustion.
70// Currently used to limit read-only file descriptors and mmap file usage
71// so that we do not run out of file descriptors or virtual memory, or run into
72// kernel performance problems for very large databases.
73class Limiter {
74 public:
75 // Limit maximum number of resources to |max_acquires|.
76 Limiter(int max_acquires)
77 :
78#if !defined(NDEBUG)
79 max_acquires_(max_acquires),
80#endif // !defined(NDEBUG)
81 acquires_allowed_(max_acquires) {
82 assert(max_acquires >= 0);
83 }
84
85 Limiter(const Limiter&) = delete;
86 Limiter operator=(const Limiter&) = delete;
87
88 // If another resource is available, acquire it and return true.
89 // Else return false.
90 bool Acquire() {
91 int old_acquires_allowed =
92 acquires_allowed_.fetch_sub(1, std::memory_order_relaxed);
93
94 if (old_acquires_allowed > 0) return true;
95
96 int pre_increment_acquires_allowed =
97 acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
98
99 // Silence compiler warnings about unused arguments when NDEBUG is defined.
100 (void)pre_increment_acquires_allowed;
101 // If the check below fails, Release() was called more times than acquire.
102 assert(pre_increment_acquires_allowed < max_acquires_);
103
104 return false;
105 }
106
107 // Release a resource acquired by a previous call to Acquire() that returned
108 // true.
109 void Release() {
110 int old_acquires_allowed =
111 acquires_allowed_.fetch_add(1, std::memory_order_relaxed);
112
113 // Silence compiler warnings about unused arguments when NDEBUG is defined.
114 (void)old_acquires_allowed;
115 // If the check below fails, Release() was called more times than acquire.
116 assert(old_acquires_allowed < max_acquires_);
117 }
118
119 private:
120#if !defined(NDEBUG)
121 // Catches an excessive number of Release() calls.
122 const int max_acquires_;
123#endif // !defined(NDEBUG)
124
125 // The number of available resources.
126 //
127 // This is a counter and is not tied to the invariants of any other class, so
128 // it can be operated on safely using std::memory_order_relaxed.
129 std::atomic<int> acquires_allowed_;
130};
131
132// Implements sequential read access in a file using read().
133//
134// Instances of this class are thread-friendly but not thread-safe, as required
135// by the SequentialFile API.
136class PosixSequentialFile final : public SequentialFile {
137 public:
138 PosixSequentialFile(std::string filename, int fd)
139 : fd_(fd), filename_(std::move(filename)) {}
140 ~PosixSequentialFile() override { close(fd_); }
141
142 Status Read(size_t n, Slice* result, char* scratch) override {
143 Status status;
144 while (true) {
145 ::ssize_t read_size = ::read(fd_, scratch, n);
146 if (read_size < 0) { // Read error.
147 if (errno == EINTR) {
148 continue; // Retry
149 }
150 status = PosixError(filename_, errno);
151 break;
152 }
153 *result = Slice(scratch, read_size);
154 break;
155 }
156 return status;
157 }
158
159 Status Skip(uint64_t n) override {
160 if (::lseek(fd_, n, SEEK_CUR) == static_cast<off_t>(-1)) {
161 return PosixError(filename_, errno);
162 }
163 return Status::OK();
164 }
165
166 private:
167 const int fd_;
168 const std::string filename_;
169};
170
171// Implements random read access in a file using pread().
172//
173// Instances of this class are thread-safe, as required by the RandomAccessFile
174// API. Instances are immutable and Read() only calls thread-safe library
175// functions.
176class PosixRandomAccessFile final : public RandomAccessFile {
177 public:
178 // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
179 // instance, and will be used to determine if .
180 PosixRandomAccessFile(std::string filename, int fd, Limiter* fd_limiter)
181 : has_permanent_fd_(fd_limiter->Acquire()),
182 fd_(has_permanent_fd_ ? fd : -1),
183 fd_limiter_(fd_limiter),
184 filename_(std::move(filename)) {
185 if (!has_permanent_fd_) {
186 assert(fd_ == -1);
187 ::close(fd); // The file will be opened on every read.
188 }
189 }
190
191 ~PosixRandomAccessFile() override {
192 if (has_permanent_fd_) {
193 assert(fd_ != -1);
194 ::close(fd_);
195 fd_limiter_->Release();
196 }
197 }
198
199 Status Read(uint64_t offset, size_t n, Slice* result,
200 char* scratch) const override {
201 int fd = fd_;
202 if (!has_permanent_fd_) {
203 fd = ::open(filename_.c_str(), O_RDONLY | kOpenBaseFlags);
204 if (fd < 0) {
205 return PosixError(filename_, errno);
206 }
207 }
208
209 assert(fd != -1);
210
211 Status status;
212 ssize_t read_size = ::pread(fd, scratch, n, static_cast<off_t>(offset));
213 *result = Slice(scratch, (read_size < 0) ? 0 : read_size);
214 if (read_size < 0) {
215 // An error: return a non-ok status.
216 status = PosixError(filename_, errno);
217 }
218 if (!has_permanent_fd_) {
219 // Close the temporary file descriptor opened earlier.
220 assert(fd != fd_);
221 ::close(fd);
222 }
223 return status;
224 }
225
226 private:
227 const bool has_permanent_fd_; // If false, the file is opened on every read.
228 const int fd_; // -1 if has_permanent_fd_ is false.
229 Limiter* const fd_limiter_;
230 const std::string filename_;
231};
232
233// Implements random read access in a file using mmap().
234//
235// Instances of this class are thread-safe, as required by the RandomAccessFile
236// API. Instances are immutable and Read() only calls thread-safe library
237// functions.
238class PosixMmapReadableFile final : public RandomAccessFile {
239 public:
240 // mmap_base[0, length-1] points to the memory-mapped contents of the file. It
241 // must be the result of a successful call to mmap(). This instances takes
242 // over the ownership of the region.
243 //
244 // |mmap_limiter| must outlive this instance. The caller must have already
245 // acquired the right to use one mmap region, which will be released when this
246 // instance is destroyed.
247 PosixMmapReadableFile(std::string filename, char* mmap_base, size_t length,
248 Limiter* mmap_limiter)
249 : mmap_base_(mmap_base),
250 length_(length),
251 mmap_limiter_(mmap_limiter),
252 filename_(std::move(filename)) {}
253
254 ~PosixMmapReadableFile() override {
255 ::munmap(static_cast<void*>(mmap_base_), length_);
256 mmap_limiter_->Release();
257 }
258
259 Status Read(uint64_t offset, size_t n, Slice* result,
260 char* scratch) const override {
261 if (offset + n > length_) {
262 *result = Slice();
263 return PosixError(filename_, EINVAL);
264 }
265
266 *result = Slice(mmap_base_ + offset, n);
267 return Status::OK();
268 }
269
270 private:
271 char* const mmap_base_;
272 const size_t length_;
273 Limiter* const mmap_limiter_;
274 const std::string filename_;
275};
276
277class PosixWritableFile final : public WritableFile {
278 public:
279 PosixWritableFile(std::string filename, int fd)
280 : pos_(0),
281 fd_(fd),
282 is_manifest_(IsManifest(filename)),
283 filename_(std::move(filename)),
284 dirname_(Dirname(filename_)) {}
285
286 ~PosixWritableFile() override {
287 if (fd_ >= 0) {
288 // Ignoring any potential errors
289 Close();
290 }
291 }
292
293 Status Append(const Slice& data) override {
294 size_t write_size = data.size();
295 const char* write_data = data.data();
296
297 // Fit as much as possible into buffer.
298 size_t copy_size = std::min(write_size, kWritableFileBufferSize - pos_);
299 std::memcpy(buf_ + pos_, write_data, copy_size);
300 write_data += copy_size;
301 write_size -= copy_size;
302 pos_ += copy_size;
303 if (write_size == 0) {
304 return Status::OK();
305 }
306
307 // Can't fit in buffer, so need to do at least one write.
308 Status status = FlushBuffer();
309 if (!status.ok()) {
310 return status;
311 }
312
313 // Small writes go to buffer, large writes are written directly.
314 if (write_size < kWritableFileBufferSize) {
315 std::memcpy(buf_, write_data, write_size);
316 pos_ = write_size;
317 return Status::OK();
318 }
319 return WriteUnbuffered(write_data, write_size);
320 }
321
322 Status Close() override {
323 Status status = FlushBuffer();
324 const int close_result = ::close(fd_);
325 if (close_result < 0 && status.ok()) {
326 status = PosixError(filename_, errno);
327 }
328 fd_ = -1;
329 return status;
330 }
331
332 Status Flush() override { return FlushBuffer(); }
333
334 Status Sync() override {
335 // Ensure new files referred to by the manifest are in the filesystem.
336 //
337 // This needs to happen before the manifest file is flushed to disk, to
338 // avoid crashing in a state where the manifest refers to files that are not
339 // yet on disk.
340 Status status = SyncDirIfManifest();
341 if (!status.ok()) {
342 return status;
343 }
344
345 status = FlushBuffer();
346 if (!status.ok()) {
347 return status;
348 }
349
350 return SyncFd(fd_, filename_);
351 }
352
353 private:
354 Status FlushBuffer() {
355 Status status = WriteUnbuffered(buf_, pos_);
356 pos_ = 0;
357 return status;
358 }
359
360 Status WriteUnbuffered(const char* data, size_t size) {
361 while (size > 0) {
362 ssize_t write_result = ::write(fd_, data, size);
363 if (write_result < 0) {
364 if (errno == EINTR) {
365 continue; // Retry
366 }
367 return PosixError(filename_, errno);
368 }
369 data += write_result;
370 size -= write_result;
371 }
372 return Status::OK();
373 }
374
375 Status SyncDirIfManifest() {
376 Status status;
377 if (!is_manifest_) {
378 return status;
379 }
380
381 int fd = ::open(dirname_.c_str(), O_RDONLY | kOpenBaseFlags);
382 if (fd < 0) {
383 status = PosixError(dirname_, errno);
384 } else {
385 status = SyncFd(fd, dirname_);
386 ::close(fd);
387 }
388 return status;
389 }
390
391 // Ensures that all the caches associated with the given file descriptor's
392 // data are flushed all the way to durable media, and can withstand power
393 // failures.
394 //
395 // The path argument is only used to populate the description string in the
396 // returned Status if an error occurs.
397 static Status SyncFd(int fd, const std::string& fd_path) {
398#if HAVE_FULLFSYNC
399 // On macOS and iOS, fsync() doesn't guarantee durability past power
400 // failures. fcntl(F_FULLFSYNC) is required for that purpose. Some
401 // filesystems don't support fcntl(F_FULLFSYNC), and require a fallback to
402 // fsync().
403 if (::fcntl(fd, F_FULLFSYNC) == 0) {
404 return Status::OK();
405 }
406#endif // HAVE_FULLFSYNC
407
408#if HAVE_FDATASYNC
409 bool sync_success = ::fdatasync(fd) == 0;
410#else
411 bool sync_success = ::fsync(fd) == 0;
412#endif // HAVE_FDATASYNC
413
414 if (sync_success) {
415 return Status::OK();
416 }
417 return PosixError(fd_path, errno);
418 }
419
420 // Returns the directory name in a path pointing to a file.
421 //
422 // Returns "." if the path does not contain any directory separator.
423 static std::string Dirname(const std::string& filename) {
424 std::string::size_type separator_pos = filename.rfind('/');
425 if (separator_pos == std::string::npos) {
426 return std::string(".");
427 }
428 // The filename component should not contain a path separator. If it does,
429 // the splitting was done incorrectly.
430 assert(filename.find('/', separator_pos + 1) == std::string::npos);
431
432 return filename.substr(0, separator_pos);
433 }
434
435 // Extracts the file name from a path pointing to a file.
436 //
437 // The returned Slice points to |filename|'s data buffer, so it is only valid
438 // while |filename| is alive and unchanged.
439 static Slice Basename(const std::string& filename) {
440 std::string::size_type separator_pos = filename.rfind('/');
441 if (separator_pos == std::string::npos) {
442 return Slice(filename);
443 }
444 // The filename component should not contain a path separator. If it does,
445 // the splitting was done incorrectly.
446 assert(filename.find('/', separator_pos + 1) == std::string::npos);
447
448 return Slice(filename.data() + separator_pos + 1,
449 filename.length() - separator_pos - 1);
450 }
451
452 // True if the given file is a manifest file.
453 static bool IsManifest(const std::string& filename) {
454 return Basename(filename).starts_with("MANIFEST");
455 }
456
457 // buf_[0, pos_ - 1] contains data to be written to fd_.
458 char buf_[kWritableFileBufferSize];
459 size_t pos_;
460 int fd_;
461
462 const bool is_manifest_; // True if the file's name starts with MANIFEST.
463 const std::string filename_;
464 const std::string dirname_; // The directory of filename_.
465};
466
467int LockOrUnlock(int fd, bool lock) {
468 errno = 0;
469 struct ::flock file_lock_info;
470 std::memset(&file_lock_info, 0, sizeof(file_lock_info));
471 file_lock_info.l_type = (lock ? F_WRLCK : F_UNLCK);
472 file_lock_info.l_whence = SEEK_SET;
473 file_lock_info.l_start = 0;
474 file_lock_info.l_len = 0; // Lock/unlock entire file.
475 return ::fcntl(fd, F_SETLK, &file_lock_info);
476}
477
478// Instances are thread-safe because they are immutable.
479class PosixFileLock : public FileLock {
480 public:
481 PosixFileLock(int fd, std::string filename)
482 : fd_(fd), filename_(std::move(filename)) {}
483
484 int fd() const { return fd_; }
485 const std::string& filename() const { return filename_; }
486
487 private:
488 const int fd_;
489 const std::string filename_;
490};
491
492// Tracks the files locked by PosixEnv::LockFile().
493//
494// We maintain a separate set instead of relying on fcntl(F_SETLK) because
495// fcntl(F_SETLK) does not provide any protection against multiple uses from the
496// same process.
497//
498// Instances are thread-safe because all member data is guarded by a mutex.
499class PosixLockTable {
500 public:
501 bool Insert(const std::string& fname) LOCKS_EXCLUDED(mu_) {
502 mu_.Lock();
503 bool succeeded = locked_files_.insert(fname).second;
504 mu_.Unlock();
505 return succeeded;
506 }
507 void Remove(const std::string& fname) LOCKS_EXCLUDED(mu_) {
508 mu_.Lock();
509 locked_files_.erase(fname);
510 mu_.Unlock();
511 }
512
513 private:
514 port::Mutex mu_;
515 std::set<std::string> locked_files_ GUARDED_BY(mu_);
516};
517
518class PosixEnv : public Env {
519 public:
520 PosixEnv();
521 ~PosixEnv() override {
522 static const char msg[] =
523 "PosixEnv singleton destroyed. Unsupported behavior!\n";
524 std::fwrite(msg, 1, sizeof(msg), stderr);
525 std::abort();
526 }
527
528 Status NewSequentialFile(const std::string& filename,
529 SequentialFile** result) override {
530 int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
531 if (fd < 0) {
532 *result = nullptr;
533 return PosixError(filename, errno);
534 }
535
536 *result = new PosixSequentialFile(filename, fd);
537 return Status::OK();
538 }
539
540 Status NewRandomAccessFile(const std::string& filename,
541 RandomAccessFile** result) override {
542 *result = nullptr;
543 int fd = ::open(filename.c_str(), O_RDONLY | kOpenBaseFlags);
544 if (fd < 0) {
545 return PosixError(filename, errno);
546 }
547
548 if (!mmap_limiter_.Acquire()) {
549 *result = new PosixRandomAccessFile(filename, fd, &fd_limiter_);
550 return Status::OK();
551 }
552
553 uint64_t file_size;
554 Status status = GetFileSize(filename, &file_size);
555 if (status.ok()) {
556 void* mmap_base =
557 ::mmap(/*addr=*/nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
558 if (mmap_base != MAP_FAILED) {
559 *result = new PosixMmapReadableFile(filename,
560 reinterpret_cast<char*>(mmap_base),
561 file_size, &mmap_limiter_);
562 } else {
563 status = PosixError(filename, errno);
564 }
565 }
566 ::close(fd);
567 if (!status.ok()) {
568 mmap_limiter_.Release();
569 }
570 return status;
571 }
572
573 Status NewWritableFile(const std::string& filename,
574 WritableFile** result) override {
575 int fd = ::open(filename.c_str(),
576 O_TRUNC | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
577 if (fd < 0) {
578 *result = nullptr;
579 return PosixError(filename, errno);
580 }
581
582 *result = new PosixWritableFile(filename, fd);
583 return Status::OK();
584 }
585
586 Status NewAppendableFile(const std::string& filename,
587 WritableFile** result) override {
588 int fd = ::open(filename.c_str(),
589 O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
590 if (fd < 0) {
591 *result = nullptr;
592 return PosixError(filename, errno);
593 }
594
595 *result = new PosixWritableFile(filename, fd);
596 return Status::OK();
597 }
598
599 bool FileExists(const std::string& filename) override {
600 return ::access(filename.c_str(), F_OK) == 0;
601 }
602
603 Status GetChildren(const std::string& directory_path,
604 std::vector<std::string>* result) override {
605 result->clear();
606 ::DIR* dir = ::opendir(directory_path.c_str());
607 if (dir == nullptr) {
608 return PosixError(directory_path, errno);
609 }
610 struct ::dirent* entry;
611 while ((entry = ::readdir(dir)) != nullptr) {
612 result->emplace_back(entry->d_name);
613 }
614 ::closedir(dir);
615 return Status::OK();
616 }
617
618 Status RemoveFile(const std::string& filename) override {
619 if (::unlink(filename.c_str()) != 0) {
620 return PosixError(filename, errno);
621 }
622 return Status::OK();
623 }
624
625 Status CreateDir(const std::string& dirname) override {
626 if (::mkdir(dirname.c_str(), 0755) != 0) {
627 return PosixError(dirname, errno);
628 }
629 return Status::OK();
630 }
631
632 Status RemoveDir(const std::string& dirname) override {
633 if (::rmdir(dirname.c_str()) != 0) {
634 return PosixError(dirname, errno);
635 }
636 return Status::OK();
637 }
638
639 Status GetFileSize(const std::string& filename, uint64_t* size) override {
640 struct ::stat file_stat;
641 if (::stat(filename.c_str(), &file_stat) != 0) {
642 *size = 0;
643 return PosixError(filename, errno);
644 }
645 *size = file_stat.st_size;
646 return Status::OK();
647 }
648
649 Status RenameFile(const std::string& from, const std::string& to) override {
650 if (std::rename(from.c_str(), to.c_str()) != 0) {
651 return PosixError(from, errno);
652 }
653 return Status::OK();
654 }
655
656 Status LockFile(const std::string& filename, FileLock** lock) override {
657 *lock = nullptr;
658
659 int fd = ::open(filename.c_str(), O_RDWR | O_CREAT | kOpenBaseFlags, 0644);
660 if (fd < 0) {
661 return PosixError(filename, errno);
662 }
663
664 if (!locks_.Insert(filename)) {
665 ::close(fd);
666 return Status::IOError("lock " + filename, "already held by process");
667 }
668
669 if (LockOrUnlock(fd, true) == -1) {
670 int lock_errno = errno;
671 ::close(fd);
672 locks_.Remove(filename);
673 return PosixError("lock " + filename, lock_errno);
674 }
675
676 *lock = new PosixFileLock(fd, filename);
677 return Status::OK();
678 }
679
680 Status UnlockFile(FileLock* lock) override {
681 PosixFileLock* posix_file_lock = static_cast<PosixFileLock*>(lock);
682 if (LockOrUnlock(posix_file_lock->fd(), false) == -1) {
683 return PosixError("unlock " + posix_file_lock->filename(), errno);
684 }
685 locks_.Remove(posix_file_lock->filename());
686 ::close(posix_file_lock->fd());
687 delete posix_file_lock;
688 return Status::OK();
689 }
690
691 void Schedule(void (*background_work_function)(void* background_work_arg),
692 void* background_work_arg) override;
693
694 void StartThread(void (*thread_main)(void* thread_main_arg),
695 void* thread_main_arg) override {
696 std::thread new_thread(thread_main, thread_main_arg);
697 new_thread.detach();
698 }
699
700 Status GetTestDirectory(std::string* result) override {
701 const char* env = std::getenv("TEST_TMPDIR");
702 if (env && env[0] != '\0') {
703 *result = env;
704 } else {
705 char buf[100];
706 std::snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d",
707 static_cast<int>(::geteuid()));
708 *result = buf;
709 }
710
711 // The CreateDir status is ignored because the directory may already exist.
712 CreateDir(*result);
713
714 return Status::OK();
715 }
716
717 Status NewLogger(const std::string& filename, Logger** result) override {
718 int fd = ::open(filename.c_str(),
719 O_APPEND | O_WRONLY | O_CREAT | kOpenBaseFlags, 0644);
720 if (fd < 0) {
721 *result = nullptr;
722 return PosixError(filename, errno);
723 }
724
725 std::FILE* fp = ::fdopen(fd, "w");
726 if (fp == nullptr) {
727 ::close(fd);
728 *result = nullptr;
729 return PosixError(filename, errno);
730 } else {
731 *result = new PosixLogger(fp);
732 return Status::OK();
733 }
734 }
735
736 uint64_t NowMicros() override {
737 static constexpr uint64_t kUsecondsPerSecond = 1000000;
738 struct ::timeval tv;
739 ::gettimeofday(&tv, nullptr);
740 return static_cast<uint64_t>(tv.tv_sec) * kUsecondsPerSecond + tv.tv_usec;
741 }
742
743 void SleepForMicroseconds(int micros) override {
744 std::this_thread::sleep_for(std::chrono::microseconds(micros));
745 }
746
747 private:
748 void BackgroundThreadMain();
749
750 static void BackgroundThreadEntryPoint(PosixEnv* env) {
751 env->BackgroundThreadMain();
752 }
753
754 // Stores the work item data in a Schedule() call.
755 //
756 // Instances are constructed on the thread calling Schedule() and used on the
757 // background thread.
758 //
759 // This structure is thread-safe because it is immutable.
760 struct BackgroundWorkItem {
761 explicit BackgroundWorkItem(void (*function)(void* arg), void* arg)
762 : function(function), arg(arg) {}
763
764 void (*const function)(void*);
765 void* const arg;
766 };
767
768 port::Mutex background_work_mutex_;
769 port::CondVar background_work_cv_ GUARDED_BY(background_work_mutex_);
770 bool started_background_thread_ GUARDED_BY(background_work_mutex_);
771
772 std::queue<BackgroundWorkItem> background_work_queue_
773 GUARDED_BY(background_work_mutex_);
774
775 PosixLockTable locks_; // Thread-safe.
776 Limiter mmap_limiter_; // Thread-safe.
777 Limiter fd_limiter_; // Thread-safe.
778};
779
780// Return the maximum number of concurrent mmaps.
781int MaxMmaps() { return g_mmap_limit; }
782
783// Return the maximum number of read-only files to keep open.
784int MaxOpenFiles() {
785 if (g_open_read_only_file_limit >= 0) {
786 return g_open_read_only_file_limit;
787 }
788#ifdef __Fuchsia__
789 // Fuchsia doesn't implement getrlimit.
790 g_open_read_only_file_limit = 50;
791#else
792 struct ::rlimit rlim;
793 if (::getrlimit(RLIMIT_NOFILE, &rlim)) {
794 // getrlimit failed, fallback to hard-coded default.
795 g_open_read_only_file_limit = 50;
796 } else if (rlim.rlim_cur == RLIM_INFINITY) {
797 g_open_read_only_file_limit = std::numeric_limits<int>::max();
798 } else {
799 // Allow use of 20% of available file descriptors for read-only files.
800 g_open_read_only_file_limit = rlim.rlim_cur / 5;
801 }
802#endif
803 return g_open_read_only_file_limit;
804}
805
806} // namespace
807
808PosixEnv::PosixEnv()
809 : background_work_cv_(&background_work_mutex_),
810 started_background_thread_(false),
811 mmap_limiter_(MaxMmaps()),
812 fd_limiter_(MaxOpenFiles()) {}
813
814void PosixEnv::Schedule(
815 void (*background_work_function)(void* background_work_arg),
816 void* background_work_arg) {
817 background_work_mutex_.Lock();
818
819 // Start the background thread, if we haven't done so already.
820 if (!started_background_thread_) {
821 started_background_thread_ = true;
822 std::thread background_thread(PosixEnv::BackgroundThreadEntryPoint, this);
823 background_thread.detach();
824 }
825
826 // If the queue is empty, the background thread may be waiting for work.
827 if (background_work_queue_.empty()) {
828 background_work_cv_.Signal();
829 }
830
831 background_work_queue_.emplace(background_work_function, background_work_arg);
832 background_work_mutex_.Unlock();
833}
834
835void PosixEnv::BackgroundThreadMain() {
836 while (true) {
837 background_work_mutex_.Lock();
838
839 // Wait until there is work to be done.
840 while (background_work_queue_.empty()) {
841 background_work_cv_.Wait();
842 }
843
844 assert(!background_work_queue_.empty());
845 auto background_work_function = background_work_queue_.front().function;
846 void* background_work_arg = background_work_queue_.front().arg;
847 background_work_queue_.pop();
848
849 background_work_mutex_.Unlock();
850 background_work_function(background_work_arg);
851 }
852}
853
854namespace {
855
856// Wraps an Env instance whose destructor is never created.
857//
858// Intended usage:
859// using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
860// void ConfigurePosixEnv(int param) {
861// PlatformSingletonEnv::AssertEnvNotInitialized();
862// // set global configuration flags.
863// }
864// Env* Env::Default() {
865// static PlatformSingletonEnv default_env;
866// return default_env.env();
867// }
868template <typename EnvType>
869class SingletonEnv {
870 public:
871 SingletonEnv() {
872#if !defined(NDEBUG)
873 env_initialized_.store(true, std::memory_order_relaxed);
874#endif // !defined(NDEBUG)
875 static_assert(sizeof(env_storage_) >= sizeof(EnvType),
876 "env_storage_ will not fit the Env");
877 static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType),
878 "env_storage_ does not meet the Env's alignment needs");
879 new (&env_storage_) EnvType();
880 }
881 ~SingletonEnv() = default;
882
883 SingletonEnv(const SingletonEnv&) = delete;
884 SingletonEnv& operator=(const SingletonEnv&) = delete;
885
886 Env* env() { return reinterpret_cast<Env*>(&env_storage_); }
887
888 static void AssertEnvNotInitialized() {
889#if !defined(NDEBUG)
890 assert(!env_initialized_.load(std::memory_order_relaxed));
891#endif // !defined(NDEBUG)
892 }
893
894 private:
895 typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type
896 env_storage_;
897#if !defined(NDEBUG)
898 static std::atomic<bool> env_initialized_;
899#endif // !defined(NDEBUG)
900};
901
902#if !defined(NDEBUG)
903template <typename EnvType>
904std::atomic<bool> SingletonEnv<EnvType>::env_initialized_;
905#endif // !defined(NDEBUG)
906
907using PosixDefaultEnv = SingletonEnv<PosixEnv>;
908
909} // namespace
910
911void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
912 PosixDefaultEnv::AssertEnvNotInitialized();
913 g_open_read_only_file_limit = limit;
914}
915
916void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
917 PosixDefaultEnv::AssertEnvNotInitialized();
918 g_mmap_limit = limit;
919}
920
921Env* Env::Default() {
922 static PosixDefaultEnv env_container;
923 return env_container.env();
924}
925
926} // namespace leveldb
927