1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_FRAMEWORK_LOG_MEMORY_H_
17#define TENSORFLOW_CORE_FRAMEWORK_LOG_MEMORY_H_
18
19#include "tensorflow/core/framework/tensor.h"
20#include "tensorflow/core/platform/protobuf.h"
21
22namespace tensorflow {
23
24// LogMemory contains methods for recording memory allocations and
25// frees, associating each allocation with a step identified by a
26// process-wide id. For now, logging is enabled whenever VLOG_IS_ON(1)
27// for the log_memory module.
28//
29// Limitations: We don't log memory allocations by Eigen on the CPU
30// since that would require major changes to plumb through to the
31// Eigen::{DefaultDevice,ThreadPoolDevice} allocate and deallocate
32// methods. We do log Eigen allocations on GPU since the plumbing was
33// already in place.
34class LogMemory {
35 public:
36 // Allocations sometimes happen outside any computation step, and
37 // SpecialStepIds lists the ids used for those steps.
38 enum SpecialStepIds {
39 // Used when performing a just-in-time constant folding optimization.
40 CONSTANT_FOLDING_STEP_ID = -1,
41 // Used when constructing an Op kernel before executing a step.
42 OP_KERNEL_CONSTRUCTION_STEP_ID = -2,
43 // Used when allocating a tensor buffer from external code, e.g.,
44 // the C API.
45 EXTERNAL_TENSOR_ALLOCATION_STEP_ID = -3,
46 // Used when allocating a buffer for network transfer.
47 NETWORK_BUFFER_STEP_ID = -4,
48 // Used when allocating a buffer to fill a Proto from the GPU.
49 PROTO_BUFFER_STEP_ID = -5,
50 // Used when allocating a Tensor where the caller has not indicated
51 // the step.
52 UNKNOWN_STEP_ID = -6,
53 };
54
55 static const std::string kLogMemoryLabel;
56
57 // Test to see if memory logging is enabled. For now, logging is
58 // enabled whenever VLOG_IS_ON(2) for the log_memory module.
59 static bool IsEnabled();
60
61 // Log the beginning of a step.
62 static void RecordStep(int64_t step_id, const std::string& handle);
63
64 // Log a tensor buffer allocation. The name indicates which kernel
65 // made the allocation. If the allocation is made through an
66 // OpKernelContext the step_id indicates which step is executing,
67 // otherwise step_id is one of the SpecialStepIds defined in
68 // op_kernel.h, e.g. Op Kernel construction or an optimization pass
69 // such as constant folding.
70 static void RecordTensorAllocation(const std::string& kernel_name,
71 int64_t step_id, const Tensor& tensor);
72
73 // Log a tensor buffer deallocation. The deallocation is triggered
74 // when the buffer's refcount falls to zero, and the tracking
75 // mechanism does not associate it with a particular step or
76 // kernel. The allocation_id/allocator_name should match a
77 // corresponding tensor previously passed in to
78 // RecordTensorAllocation.
79 static void RecordTensorDeallocation(int64_t allocation_id,
80 const std::string& allocator_name);
81
82 // Log the use of a tensor as an output from a kernel.
83 static void RecordTensorOutput(const std::string& kernel_name,
84 int64_t step_id, int index,
85 const Tensor& tensor);
86
87 // Log a "raw" allocation, which is just a buffer sized in
88 // bytes. The Eigen allocator, and memory copies, record their
89 // allocations this way, since they do not allocate TensorFlow
90 // tensors. The operation is set to the OpKernel name if this is
91 // called from within an Op execution, otherwise it indicates an
92 // operation such as memcpy. The step_id if >=0 indicates which step
93 // is executing, otherwise step_id is one of the SpecialStepIds
94 // defined in op_kernel.h, e.g. Op Kernel construction or an
95 // optimization pass such as constant folding.
96 static void RecordRawAllocation(const std::string& operation, int64_t step_id,
97 size_t num_bytes, void* ptr,
98 Allocator* allocator);
99
100 // Log a "raw" deallocation of a buffer. When deferred is true, the
101 // buffer won't be used again, but a GPU kernel may still be
102 // enqueued using the buffer. A deferred deallocation should always
103 // be followed by a matching non-deferred deallocation when the
104 // buffer is actually returned and can be reused.
105 static void RecordRawDeallocation(const std::string& operation,
106 int64_t step_id, void* ptr,
107 Allocator* allocator, bool deferred);
108};
109
110} // namespace tensorflow
111
112#endif // TENSORFLOW_CORE_FRAMEWORK_LOG_MEMORY_H_
113