1 | #pragma once |
---|---|
2 | #include "taichi/common/core.h" |
3 | #include "taichi/system/unified_allocator.h" |
4 | #define TI_RUNTIME_HOST |
5 | #include "taichi/runtime/llvm/runtime_module/mem_request.h" |
6 | #undef TI_RUNTIME_HOST |
7 | #include "taichi/rhi/device.h" |
8 | #include <mutex> |
9 | #include <vector> |
10 | #include <memory> |
11 | #include <thread> |
12 | |
13 | namespace taichi::lang { |
14 | |
15 | // A memory pool that runs on the host |
16 | |
17 | class TI_DLL_EXPORT MemoryPool { |
18 | public: |
19 | std::vector<std::unique_ptr<UnifiedAllocator>> allocators; |
20 | static constexpr std::size_t default_allocator_size = |
21 | 1 << 30; // 1 GB per allocator |
22 | bool terminating, killed; |
23 | std::mutex mut; |
24 | std::mutex mut_allocators; |
25 | std::unique_ptr<std::thread> th; |
26 | int processed_tail; |
27 | |
28 | MemRequestQueue *queue; |
29 | void *cuda_stream{nullptr}; |
30 | void *amdgpu_stream{nullptr}; |
31 | |
32 | // In the future we wish to move the MemoryPool inside each Device |
33 | // so that the memory allocated from each Device can be used as-is. |
34 | MemoryPool(Arch arch, Device *device); |
35 | |
36 | template <typename T> |
37 | T fetch(volatile void *ptr); |
38 | |
39 | template <typename T> |
40 | void push(volatile T *dest, const T &val); |
41 | |
42 | void *allocate(std::size_t size, std::size_t alignment); |
43 | |
44 | void set_queue(MemRequestQueue *queue); |
45 | |
46 | void daemon(); |
47 | |
48 | void terminate(); |
49 | |
50 | ~MemoryPool(); |
51 | |
52 | private: |
53 | static constexpr bool use_cuda_stream = false; |
54 | static constexpr bool use_amdgpu_stream = false; |
55 | Arch arch_; |
56 | Device *device_; |
57 | }; |
58 | |
59 | } // namespace taichi::lang |
60 |