1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ |
17 | #define TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ |
18 | |
19 | #include <unordered_map> |
20 | #include <vector> |
21 | |
22 | #include "tensorflow/core/framework/cost_graph.pb.h" |
23 | #include "tensorflow/core/framework/step_stats.pb.h" |
24 | #include "tensorflow/core/framework/tensor_shape.pb.h" |
25 | #include "tensorflow/core/graph/graph.h" |
26 | #include "tensorflow/core/graph/types.h" |
27 | #include "tensorflow/core/lib/core/stringpiece.h" |
28 | #include "tensorflow/core/lib/gtl/array_slice.h" |
29 | #include "tensorflow/core/platform/macros.h" |
30 | #include "tensorflow/core/platform/protobuf.h" |
31 | |
32 | namespace tensorflow { |
33 | typedef std::unordered_map<StringPiece, int32, StringPieceHasher> |
34 | NodeNameToCostIdMap; |
35 | |
36 | class StepStats; |
37 | |
38 | // CostModel keeps track of the following runtime statistics for nodes |
39 | // of a single Graph: |
40 | // * The total number of times a node has executed. |
41 | // * The accumulated execution time (in microseconds) of a node. |
42 | // * The accumulated size (in bytes) of each node's output. |
43 | // |
44 | // This class is NOT thread-safe. |
45 | class CostModel { |
46 | public: |
47 | // If "global" is true, maintains costs based on Node::cost_id, otherwise |
48 | // maintains costs based on Node::id. |
49 | explicit CostModel(bool is_global) : is_global_(is_global) { |
50 | unknown_shape_.set_unknown_rank(true); |
51 | } |
52 | |
53 | // Assigns min_count_ as a function of the median count for a Node. |
54 | // This value is then used for suppressing the time/size costs of |
55 | // infrequent operations. |
56 | // NOTE(tucker): Maybe this should move to a subclass of CostModel. |
57 | void SuppressInfrequent(); |
58 | |
59 | bool is_global() const { return is_global_; } |
60 | |
61 | inline int Id(const Node* n) const { |
62 | if (is_global_) { |
63 | return n->cost_id(); |
64 | } else { |
65 | return n->id(); |
66 | } |
67 | } |
68 | |
69 | inline int GlobalId(const Node* n, int offset) const { |
70 | if (is_global_) { |
71 | return n->cost_id(); |
72 | } else { |
73 | return n->id() + offset; |
74 | } |
75 | } |
76 | |
77 | // Initializes cost model for 'g'. |
78 | void InitFromGraph(const Graph& g); |
79 | |
80 | // Merges costs from cm. |
81 | // REQUIRES: is_global_ is true for this and for "cm" |
82 | void MergeFromGlobal(const CostModel& cm); |
83 | |
84 | // Merges costs from "cm", which has been computed relative to "g". |
85 | // REQUIRES: is_global_ is true for this, and false for "cm". |
86 | void MergeFromLocal(const Graph& g, const CostModel& cm); |
87 | |
88 | void MergeFromStats(const NodeNameToCostIdMap& map, const StepStats& ss); |
89 | |
90 | // Sets the number of outputs of "node". |
91 | void SetNumOutputs(const Node* node, int num_outputs); |
92 | |
93 | // Records that "node" has executed "num_count" more times. |
94 | void RecordCount(const Node* node, int num_count); |
95 | |
96 | // Returns how many times "node" has been executed. |
97 | int32 TotalCount(const Node* node) const; |
98 | |
99 | // Records that "output_slot" of "node" has produced tensors of |
100 | // aggregated "bytes". |
101 | void RecordSize(const Node* node, int output_slot, Bytes bytes); |
102 | |
103 | // Returns total bytes of tensors produced by "node"s output slot. |
104 | Bytes TotalBytes(const Node* node, int output_slot) const; |
105 | |
106 | // Returns a prediction for the size of the tensor at the |
107 | // output_slot produced by one execution of "node". |
108 | Bytes SizeEstimate(const Node* node, int output_slot) const; |
109 | |
110 | // Records that Executions of "node" have taken "time" microseconds. |
111 | void RecordTime(const Node* node, Microseconds time); |
112 | |
113 | // Returns the total execution time for "node". |
114 | Microseconds TotalTime(const Node* node) const; |
115 | |
116 | // Returns a prediction for one execution of "node". |
117 | Microseconds TimeEstimate(const Node* node) const; |
118 | |
119 | // Check that an estimate is available for every OP node in graph. |
120 | void CheckInitialized(const Graph& graph) const; |
121 | |
122 | // Records the maximum size in bytes and optionally the corresponding shape of |
123 | // the tensor generated by "output_slot" of "node". If |
124 | void RecordMaxMemorySize(const Node* node, int output_slot, Bytes bytes, |
125 | const TensorShapeProto& tensor_shape, |
126 | const DataType& dtype); |
127 | |
128 | // Returns the maximum size in bytes of the tensor generated by "output_slot" |
129 | // of "node". |
130 | Bytes MaxMemorySize(const Node* node, int output_slot) const; |
131 | |
132 | // Returns the shape corresponding to the largest memory size of the tensor |
133 | // generated by "output_slot" of "node". |
134 | const TensorShapeProto& MaxMemoryShape(const Node* node, |
135 | int output_slot) const; |
136 | |
137 | // Returns the shape corresponding to the largest memory size of the tensor |
138 | // generated by "output_slot" of "node". |
139 | DataType MaxMemoryType(const Node* node, int output_slot) const; |
140 | |
141 | // Returns the size in bytes of temporary memory consumed by "node". |
142 | Bytes TempMemorySize(const Node* node) const; |
143 | |
144 | // Returns the size of persistent memory allocated by "node". |
145 | Bytes PersistentMemorySize(const Node* node) const; |
146 | |
147 | // Records memory stats such as temp momory and persistent memory. |
148 | void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats); |
149 | |
150 | // Records the maximum execution time (in microseconds) of "node". |
151 | void RecordMaxExecutionTime(const Node* node, Microseconds time); |
152 | |
153 | // Returns the maximum execution time (in microseconds) of "node". |
154 | Microseconds MaxExecutionTime(const Node* node) const; |
155 | |
156 | // Record the unique id of the tensor generated by "output_slot" of "node". |
157 | // Any other tensor sharing the same id will be an alias, i.e. it will share |
158 | // the same underlying memory storage area. |
159 | void RecordAllocationId(const Node* node, int output_slot, int64_t alloc_id); |
160 | |
161 | // Return the unique id of the tensor generated by "output_slot" of "node". |
162 | int64_t AllocationId(const Node* node, int output_slot) const; |
163 | |
164 | bool IsPersistentTensor(const Node* node, int64_t alloc_id) const; |
165 | |
166 | // Helper routines to encapsulate static estimation heuristics |
167 | |
168 | // Compute an estimate of the time to copy "b" bytes over the network, |
169 | // given a fixed cost of "network_latency_millis" milliseconds and |
170 | // an estimated bandwidth of "estimated_gbps" gigabits per second (note that |
171 | // this value is in gigabits, not gigabytes). |
172 | static Microseconds CopyTimeEstimate(Bytes b, double network_latency_millis, |
173 | double estimated_gbps); |
174 | static Microseconds ComputationTimeEstimate(int64_t mathops); |
175 | |
176 | // Add this CostModel into the CostGraphDef. |
177 | void AddToCostGraphDef(const Graph* graph, CostGraphDef* cost_graph) const; |
178 | |
179 | // Write the contents of the CostModel to the INFO log. |
180 | void WriteSummaryToLog() const; |
181 | |
182 | // Increment the times that the cost model is updated. |
183 | void IncrementUpdateTimes(); |
184 | |
185 | // Get the times that the cost model is updated. |
186 | int32 GetUpdateTimes() const; |
187 | |
188 | private: |
189 | static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape, |
190 | const DataType& dtype); |
191 | |
192 | const bool is_global_; |
193 | |
194 | // Resizes vectors so that they are large enough for "id" and id's outputs. |
195 | void Ensure(int id, int num_outputs); |
196 | |
197 | // Nodes and Edges whose count is < this value |
198 | // get type/byte estimates of 0. |
199 | int32 min_count_ = 0; |
200 | |
201 | // The number of times the cost model is updated. |
202 | int32 update_times_ = 0; |
203 | |
204 | // Number of times each Node has been executed. |
205 | std::vector<int32> count_; |
206 | // Cumulative execution time. |
207 | std::vector<Microseconds> time_; |
208 | // Cumulative Bytes output on each channel. |
209 | std::vector<gtl::InlinedVector<Bytes, 2>> slot_bytes_; |
210 | |
211 | // Maximum execution time |
212 | std::vector<Microseconds> max_exec_time_; |
213 | |
214 | // Maximum memory usage |
215 | struct MemUsage { |
216 | MemUsage() : temp_memory_size(0), persistent_memory_size(0) {} |
217 | |
218 | // TODO(yuefengz): temp_memory_size is not being used, remove it. |
219 | Bytes temp_memory_size; |
220 | Bytes persistent_memory_size; |
221 | |
222 | gtl::InlinedVector<Bytes, 2> output_port_mem; |
223 | gtl::InlinedVector<TensorShapeProto, 2> output_port_shape; |
224 | gtl::InlinedVector<DataType, 2> output_port_type; |
225 | }; |
226 | std::vector<MemUsage> max_mem_usage_; |
227 | |
228 | std::vector<gtl::InlinedVector<int64_t, 2>> output_port_alloc_ids_; |
229 | |
230 | std::set<int64_t> persistent_alloc_ids_; |
231 | std::map<string, std::set<int64_t>> persistent_alloc_ids_by_devices_; |
232 | |
233 | TensorShapeProto unknown_shape_; |
234 | |
235 | TF_DISALLOW_COPY_AND_ASSIGN(CostModel); |
236 | }; |
237 | |
238 | } // namespace tensorflow |
239 | |
240 | #endif // TENSORFLOW_CORE_GRAPH_COSTMODEL_H_ |
241 | |