CPUCachingAllocator.h source code [pytorch/c10/mobile/CPUCachingAllocator.h]

1	#pragma once
2
3	#include <algorithm>
4	#include <deque>
5	#include <memory>
6	#include <mutex>
7
8	#include <c10/util/Exception.h>
9	#include <c10/util/SmallVector.h>
10	#include <c10/util/flat_hash_map.h>
11
12	/*
13	* CPUCachingAllocator:
14	* DISCLAIMER:
15	* This is subject to change (beta) and only supported on mobile builds.
16	* If code snippet such as in 'Usage pattern' is used outside of mobile
17	* build you will not observe the intended behavior.
18	* See below for more information.
19	* Why?
20	* It has been observed that some mobile platforms, such as pixel 3, return
21	* memory aggressively to the system. This results in page faults in some
22	* cases and ends up hurting performance. This caching allocator aims to address
23	* that. Furthermore it also allows users to specify their own allocator by
24	* implementing allocate/free virtual interfaces. What are the cons? There are
25	* some cons that were observed where use of caching allocator led to worse
26	* performance on some platforms. Reason being that the caching mechanism used
27	* by this allocator left us worse off compared to the corresponding platform's
28	* tuned memory allocator. In that case it seemed better to not use this
29	* allocator. Note there are some ideas to fix this in the works.
30	*
31	* Usage:
32	* Usage pattern:
33	* Instantiate and own the caching allocator.
34	* std::unique_ptr<c10::CPUCachingAllocator> caching_allocator =
35	* std::make_unique<c10::CPUCachingAllocator>();
36	* Use caching allocator with a scoped guard at inference time.
37	* {
38	* WithCPUCachingAllocatorGuard(caching_allocator.get());
39	* ... model.forward(...);
40	* }
41	*/
42
43	namespace c10 {
44
45	class C10_API CPUCachingAllocator {
46	/*
47	* What it does:
48	* Caches all the allocations carried out by this allocator.
49	* Cache key is the size of the allocation.
50	* If requested size is found in the cache returns the cached pointer.
51	* What it does not do:
52	* No speculative allocation for any future allocations.
53	*/
54	private:
55	inline void* allocate_and_cache(const size_t bytes);
56	void free_cached();
57
58	protected:
59	// Invariants.
60	// 1. If memory is ever allocated via this allocator then
61	// the pointer will exist in allocation_map_, unless the allocator
62	// returned the memory to OS via free_cached.
63	// 1.1. Therefore even when the said memory is "freed" via this
64	// allocator (and thus cached), it will continue to stay
65	// in allocation_map_. Furthermore it will also exist in
66	// available_map_. Thus an allocated memory pointer can be in both
67	// allocation_map_ and available_map_ simultaneously.
68	// 2. Memory pointer maybe removed from allocation_map_, when it
69	// is freed outside of the scope of this allocator, but was allocated
70	// by this allocator.
71	// 3. Available map only contains that memory which was allocated
72	// by this allocator and subsequently freed by this allocator.
73	// As a result of above invariants, allocated memory ptr cannot be in
74	// available_map_ unless it is in allocation_map_ as well.
75	ska::flat_hash_map<size_t, c10::SmallVector<void*, `16`>> available_map_;
76	static ska::flat_hash_map<void*, size_t> allocation_map_;
77	// Since allocation_map, which is a global instance, is mutated/read via
78	// all public APIs we need a global mutex.
79	static std::mutex mutex_;
80
81	public:
82	static void record_free(void* ptr);
83	virtual ~CPUCachingAllocator();
84	// Checks the cache to see if allocation of size bytes can be found.
85	// If so return cached memory, else
86	// allocates memory, records it for caching and returns.
87	virtual void* allocate(const size_t bytes);
88	// Checks if the memory being freed is was marked for allocation by
89	// an earlier call to allocate. If so cache the allocation.
90	// Otherwise free.
91	virtual void free(void* ptr);
92	};
93
94	CPUCachingAllocator* GetDefaultCPUCachingAllocator();
95
96	bool ThreadLocalCachingAllocatorEnabled();
97	CPUCachingAllocator* GetThreadLocalCachingAllocator();
98
99	class C10_API WithCPUCachingAllocatorGuard {
100	public:
101	WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator);
102	~WithCPUCachingAllocatorGuard();
103
104	private:
105	CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr};
106	};
107
108	} // namespace c10
109

Browse the source code of pytorch/c10/mobile/CPUCachingAllocator.h