1#pragma once
2#include "taichi/common/core.h"
3#include "taichi/system/unified_allocator.h"
4#define TI_RUNTIME_HOST
5#include "taichi/runtime/llvm/runtime_module/mem_request.h"
6#undef TI_RUNTIME_HOST
7#include "taichi/rhi/device.h"
8#include <mutex>
9#include <vector>
10#include <memory>
11#include <thread>
12
13namespace taichi::lang {
14
15// A memory pool that runs on the host
16
17class TI_DLL_EXPORT MemoryPool {
18 public:
19 std::vector<std::unique_ptr<UnifiedAllocator>> allocators;
20 static constexpr std::size_t default_allocator_size =
21 1 << 30; // 1 GB per allocator
22 bool terminating, killed;
23 std::mutex mut;
24 std::mutex mut_allocators;
25 std::unique_ptr<std::thread> th;
26 int processed_tail;
27
28 MemRequestQueue *queue;
29 void *cuda_stream{nullptr};
30 void *amdgpu_stream{nullptr};
31
32 // In the future we wish to move the MemoryPool inside each Device
33 // so that the memory allocated from each Device can be used as-is.
34 MemoryPool(Arch arch, Device *device);
35
36 template <typename T>
37 T fetch(volatile void *ptr);
38
39 template <typename T>
40 void push(volatile T *dest, const T &val);
41
42 void *allocate(std::size_t size, std::size_t alignment);
43
44 void set_queue(MemRequestQueue *queue);
45
46 void daemon();
47
48 void terminate();
49
50 ~MemoryPool();
51
52 private:
53 static constexpr bool use_cuda_stream = false;
54 static constexpr bool use_amdgpu_stream = false;
55 Arch arch_;
56 Device *device_;
57};
58
59} // namespace taichi::lang
60