1#pragma once
2
3#include <algorithm>
4#include <deque>
5#include <memory>
6#include <mutex>
7
8#include <c10/util/Exception.h>
9#include <c10/util/SmallVector.h>
10#include <c10/util/flat_hash_map.h>
11
12/*
13 * CPUCachingAllocator:
14 * DISCLAIMER:
15 * This is subject to change (beta) and only supported on mobile builds.
16 * If code snippet such as in 'Usage pattern' is used outside of mobile
17 * build you will not observe the intended behavior.
18 * See below for more information.
19 * Why?
20 * It has been observed that some mobile platforms, such as pixel 3, return
21 * memory aggressively to the system. This results in page faults in some
22 * cases and ends up hurting performance. This caching allocator aims to address
23 * that. Furthermore it also allows users to specify their own allocator by
24 * implementing allocate/free virtual interfaces. What are the cons? There are
25 * some cons that were observed where use of caching allocator led to worse
26 * performance on some platforms. Reason being that the caching mechanism used
27 * by this allocator left us worse off compared to the corresponding platform's
28 * tuned memory allocator. In that case it seemed better to not use this
29 * allocator. Note there are some ideas to fix this in the works.
30 *
31 * Usage:
32 * Usage pattern:
33 * Instantiate and own the caching allocator.
34 * std::unique_ptr<c10::CPUCachingAllocator> caching_allocator =
35 * std::make_unique<c10::CPUCachingAllocator>();
36 * Use caching allocator with a scoped guard at inference time.
37 * {
38 * WithCPUCachingAllocatorGuard(caching_allocator.get());
39 * ... model.forward(...);
40 * }
41 */
42
43namespace c10 {
44
45class C10_API CPUCachingAllocator {
46 /*
47 * What it does:
48 * Caches all the allocations carried out by this allocator.
49 * Cache key is the size of the allocation.
50 * If requested size is found in the cache returns the cached pointer.
51 * What it does not do:
52 * No speculative allocation for any future allocations.
53 */
54 private:
55 inline void* allocate_and_cache(const size_t bytes);
56 void free_cached();
57
58 protected:
59 // Invariants.
60 // 1. If memory is ever allocated via this allocator then
61 // the pointer will exist in allocation_map_, unless the allocator
62 // returned the memory to OS via free_cached.
63 // 1.1. Therefore even when the said memory is "freed" via this
64 // allocator (and thus cached), it will continue to stay
65 // in allocation_map_. Furthermore it will also exist in
66 // available_map_. Thus an allocated memory pointer can be in both
67 // allocation_map_ and available_map_ simultaneously.
68 // 2. Memory pointer maybe removed from allocation_map_, when it
69 // is freed outside of the scope of this allocator, but was allocated
70 // by this allocator.
71 // 3. Available map only contains that memory which was allocated
72 // by this allocator and subsequently freed by this allocator.
73 // As a result of above invariants, allocated memory ptr cannot be in
74 // available_map_ unless it is in allocation_map_ as well.
75 ska::flat_hash_map<size_t, c10::SmallVector<void*, 16>> available_map_;
76 static ska::flat_hash_map<void*, size_t> allocation_map_;
77 // Since allocation_map, which is a global instance, is mutated/read via
78 // all public APIs we need a global mutex.
79 static std::mutex mutex_;
80
81 public:
82 static void record_free(void* ptr);
83 virtual ~CPUCachingAllocator();
84 // Checks the cache to see if allocation of size bytes can be found.
85 // If so return cached memory, else
86 // allocates memory, records it for caching and returns.
87 virtual void* allocate(const size_t bytes);
88 // Checks if the memory being freed is was marked for allocation by
89 // an earlier call to allocate. If so cache the allocation.
90 // Otherwise free.
91 virtual void free(void* ptr);
92};
93
94CPUCachingAllocator* GetDefaultCPUCachingAllocator();
95
96bool ThreadLocalCachingAllocatorEnabled();
97CPUCachingAllocator* GetThreadLocalCachingAllocator();
98
99class C10_API WithCPUCachingAllocatorGuard {
100 public:
101 WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator);
102 ~WithCPUCachingAllocatorGuard();
103
104 private:
105 CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr};
106};
107
108} // namespace c10
109