memory_pool.cpp source code [taichi/taichi/system/memory_pool.cpp]

1	#include "memory_pool.h"
2	#include "taichi/system/timer.h"
3	#include "taichi/rhi/cuda/cuda_driver.h"
4	#include "taichi/rhi/cuda/cuda_device.h"
5
6	namespace taichi::lang {
7
8	// In the future we wish to move the MemoryPool inside each Device
9	// so that the memory allocated from each Device can be used as-is.
10	MemoryPool::MemoryPool(Arch arch, Device *device)
11	: arch_(arch), device_(device) {
12	TI_TRACE("Memory pool created. Default buffer size per allocator = {} MB",
13	default_allocator_size / `1024` / `1024`);
14	terminating = false;
15	killed = false;
16	processed_tail = `0`;
17	queue = nullptr;
18	#if defined(TI_WITH_CUDA)
19	// http://on-demand.gputechconf.com/gtc/2014/presentations/S4158-cuda-streams-best-practices-common-pitfalls.pdf
20	// Stream 0 has special synchronization rules: Operations in stream 0 cannot
21	// overlap other streams except for those streams with cudaStreamNonBlocking
22	// Do not use cudaCreateStream (with no flags) here!
23	if (use_cuda_stream && arch_ == Arch::cuda) {
24	CUDADriver::get_instance().stream_create (&cuda_stream,
25	CU_STREAM_NON_BLOCKING);
26	}
27	#endif
28	th = std::make_unique<std::thread>([this] { this->daemon(); });
29	}
30
31	void MemoryPool::set_queue(MemRequestQueue *queue) {
32	std::lock_guard<std::mutex> _(mut);
33	this->queue = queue;
34	}
35
36	void *MemoryPool::allocate(std::size_t size, std::size_t alignment) {
37	std::lock_guard<std::mutex> _(mut_allocators);
38	void ret = nullptr*;
39	if (!allocators.empty()) {
40	ret = allocators.back()->allocate(size, alignment);
41	}
42	if (!ret) {
43	// allocation have failed
44	auto new_buffer_size = std::max(size, default_allocator_size);
45	allocators.emplace_back(
46	std::make_unique<UnifiedAllocator>(new_buffer_size, arch_, device_));
47	ret = allocators.back()->allocate(size, alignment);
48	}
49	TI_ASSERT(ret);
50	return ret;
51	}
52
53	template <typename T>
54	T MemoryPool::fetch(volatile void *ptr) {
55	T ret;
56	if (use_cuda_stream && arch_ == Arch::cuda) {
57	#if TI_WITH_CUDA
58	CUDADriver::get_instance().stream_synchronize (cuda_stream);
59	CUDADriver::get_instance().memcpy_device_to_host_async(
60	&ret, (void )ptr, sizeof*(T), cuda_stream);
61	CUDADriver::get_instance().stream_synchronize (cuda_stream);
62	#else
63	TI_NOT_IMPLEMENTED
64	#endif
65	} else {
66	ret = (T )ptr;
67	}
68	return ret;
69	}
70
71	template <typename T>
72	void MemoryPool::push(volatile T dest, const* T &val) {
73	if (use_cuda_stream && arch_ == Arch::cuda) {
74	#if TI_WITH_CUDA
75	CUDADriver::get_instance().memcpy_host_to_device_async (
76	(void )(dest), (void* )&val, sizeof*(T), cuda_stream);
77	CUDADriver::get_instance().stream_synchronize (cuda_stream);
78	#else
79	TI_NOT_IMPLEMENTED
80	#endif
81	} else {
82	(T )dest = val;
83	}
84	}
85
86	void MemoryPool::daemon() {
87	while (true) {
88	Time::usleep(`1000`);
89	std::lock_guard<std::mutex> _(mut);
90	if (terminating) {
91	killed = true;
92	break;
93	}
94	if (!queue) {
95	continue;
96	}
97
98	// poll allocation requests.
99	using tail_type = decltype(MemRequestQueue::tail);
100	auto tail = fetch<tail_type>(&queue->tail);
101	if (tail > processed_tail) {
102	// allocate new buffer
103	auto i = processed_tail;
104	TI_DEBUG("Processing memory alloc request {}", i);
105	auto req = fetch<MemRequest>(&queue->requests[i]);
106	if (req.size == `0` \|\| req.alignment == `0`) {
107	TI_DEBUG(" Incomplete memory alloc request {} fetched. Skipping", i);
108	continue;
109	}
110	TI_DEBUG(" Allocating memory {} B (alignment {}B) ", req.size,
111	req.alignment);
112	auto ptr = allocate(req.size, req.alignment);
113	TI_DEBUG(" Allocated. Ptr = {:p}", ptr);
114	push(&queue->requests[i].ptr, (uint8 *)ptr);
115	processed_tail += `1`;
116	}
117	}
118	}
119
120	void MemoryPool::terminate() {
121	{
122	std::lock_guard<std::mutex> _(mut);
123	terminating = true;
124	}
125	th ->join();
126	TI_ASSERT(killed);
127	#if 0 && defined(TI_WITH_CUDA)
128	if (arch_ == Arch::cuda)
129	CUDADriver::get_instance().cudaStreamDestroy(cuda_stream);
130	#endif
131	}
132
133	MemoryPool::~MemoryPool() {
134	if (!killed) {
135	terminate();
136	}
137	}
138
139	} // namespace taichi::lang
140

Browse the source code of taichi/taichi/system/memory_pool.cpp