1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_FRAMEWORK_LOG_MEMORY_H_ |
17 | #define TENSORFLOW_CORE_FRAMEWORK_LOG_MEMORY_H_ |
18 | |
19 | #include "tensorflow/core/framework/tensor.h" |
20 | #include "tensorflow/core/platform/protobuf.h" |
21 | |
22 | namespace tensorflow { |
23 | |
24 | // LogMemory contains methods for recording memory allocations and |
25 | // frees, associating each allocation with a step identified by a |
26 | // process-wide id. For now, logging is enabled whenever VLOG_IS_ON(1) |
27 | // for the log_memory module. |
28 | // |
29 | // Limitations: We don't log memory allocations by Eigen on the CPU |
30 | // since that would require major changes to plumb through to the |
31 | // Eigen::{DefaultDevice,ThreadPoolDevice} allocate and deallocate |
32 | // methods. We do log Eigen allocations on GPU since the plumbing was |
33 | // already in place. |
34 | class LogMemory { |
35 | public: |
36 | // Allocations sometimes happen outside any computation step, and |
37 | // SpecialStepIds lists the ids used for those steps. |
38 | enum SpecialStepIds { |
39 | // Used when performing a just-in-time constant folding optimization. |
40 | CONSTANT_FOLDING_STEP_ID = -1, |
41 | // Used when constructing an Op kernel before executing a step. |
42 | OP_KERNEL_CONSTRUCTION_STEP_ID = -2, |
43 | // Used when allocating a tensor buffer from external code, e.g., |
44 | // the C API. |
45 | EXTERNAL_TENSOR_ALLOCATION_STEP_ID = -3, |
46 | // Used when allocating a buffer for network transfer. |
47 | NETWORK_BUFFER_STEP_ID = -4, |
48 | // Used when allocating a buffer to fill a Proto from the GPU. |
49 | PROTO_BUFFER_STEP_ID = -5, |
50 | // Used when allocating a Tensor where the caller has not indicated |
51 | // the step. |
52 | UNKNOWN_STEP_ID = -6, |
53 | }; |
54 | |
55 | static const std::string kLogMemoryLabel; |
56 | |
57 | // Test to see if memory logging is enabled. For now, logging is |
58 | // enabled whenever VLOG_IS_ON(2) for the log_memory module. |
59 | static bool IsEnabled(); |
60 | |
61 | // Log the beginning of a step. |
62 | static void RecordStep(int64_t step_id, const std::string& handle); |
63 | |
64 | // Log a tensor buffer allocation. The name indicates which kernel |
65 | // made the allocation. If the allocation is made through an |
66 | // OpKernelContext the step_id indicates which step is executing, |
67 | // otherwise step_id is one of the SpecialStepIds defined in |
68 | // op_kernel.h, e.g. Op Kernel construction or an optimization pass |
69 | // such as constant folding. |
70 | static void RecordTensorAllocation(const std::string& kernel_name, |
71 | int64_t step_id, const Tensor& tensor); |
72 | |
73 | // Log a tensor buffer deallocation. The deallocation is triggered |
74 | // when the buffer's refcount falls to zero, and the tracking |
75 | // mechanism does not associate it with a particular step or |
76 | // kernel. The allocation_id/allocator_name should match a |
77 | // corresponding tensor previously passed in to |
78 | // RecordTensorAllocation. |
79 | static void RecordTensorDeallocation(int64_t allocation_id, |
80 | const std::string& allocator_name); |
81 | |
82 | // Log the use of a tensor as an output from a kernel. |
83 | static void RecordTensorOutput(const std::string& kernel_name, |
84 | int64_t step_id, int index, |
85 | const Tensor& tensor); |
86 | |
87 | // Log a "raw" allocation, which is just a buffer sized in |
88 | // bytes. The Eigen allocator, and memory copies, record their |
89 | // allocations this way, since they do not allocate TensorFlow |
90 | // tensors. The operation is set to the OpKernel name if this is |
91 | // called from within an Op execution, otherwise it indicates an |
92 | // operation such as memcpy. The step_id if >=0 indicates which step |
93 | // is executing, otherwise step_id is one of the SpecialStepIds |
94 | // defined in op_kernel.h, e.g. Op Kernel construction or an |
95 | // optimization pass such as constant folding. |
96 | static void RecordRawAllocation(const std::string& operation, int64_t step_id, |
97 | size_t num_bytes, void* ptr, |
98 | Allocator* allocator); |
99 | |
100 | // Log a "raw" deallocation of a buffer. When deferred is true, the |
101 | // buffer won't be used again, but a GPU kernel may still be |
102 | // enqueued using the buffer. A deferred deallocation should always |
103 | // be followed by a matching non-deferred deallocation when the |
104 | // buffer is actually returned and can be reused. |
105 | static void RecordRawDeallocation(const std::string& operation, |
106 | int64_t step_id, void* ptr, |
107 | Allocator* allocator, bool deferred); |
108 | }; |
109 | |
110 | } // namespace tensorflow |
111 | |
112 | #endif // TENSORFLOW_CORE_FRAMEWORK_LOG_MEMORY_H_ |
113 | |