CachingHostAllocator.h source code [pytorch/aten/src/ATen/cuda/CachingHostAllocator.h]

1	#pragma once
2
3	#include <c10/core/Allocator.h>
4	#include <c10/cuda/CUDAStream.h>
5
6	namespace at {
7	namespace cuda {
8
9	//
10	// A caching allocator for CUDA host allocations (pinned memory).
11	//
12	// This provides a drop-in replacement for THCudaHostAllocator, which re-uses
13	// freed pinned (page-locked) memory allocations. This avoids device
14	// synchronizations due to cudaFreeHost calls.
15	//
16	// To ensure correct behavior, THCCachingHostAllocator_recordEvent must be
17	// called anytime a pointer from this allocator is used in a cudaMemcpyAsync
18	// call between host and device, and passed the corresponding context from the
19	// allocation. This is currently invoked by at::native::copy_kernel_cuda.
20	//
21	// Note that this allocator does not split larger allocations into smaller
22	// blocks, unlike the caching device allocator.
23	//
24	TORCH_CUDA_CPP_API c10::Allocator* getCachingHostAllocator();
25
26	// Records an event in the specified stream. The allocation corresponding to the
27	// input `ptr`/`ctx` will not be re-used until the event has occurred.
28	TORCH_CUDA_CPP_API bool
29	CachingHostAllocator_recordEvent(void* ptr, void* ctx, c10::cuda::CUDAStream stream);
30
31	// Releases cached pinned memory allocations via cudaHostFree
32	TORCH_CUDA_CPP_API void CachingHostAllocator_emptyCache();
33
34	inline TORCH_CUDA_CPP_API at::DataPtr HostAlloc(size_t size) {
35	return getCachingHostAllocator()->allocate(size);
36	}
37
38	} // namespace cuda
39	} // namespace at
40

Browse the source code of pytorch/aten/src/ATen/cuda/CachingHostAllocator.h