costmodel.h source code [tensorflow/tensorflow/core/graph/costmodel.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_CORE_GRAPH_COSTMODEL_H_
17	#define TENSORFLOW_CORE_GRAPH_COSTMODEL_H_
18
19	#include <unordered_map>
20	#include <vector>
21
22	#include "tensorflow/core/framework/cost_graph.pb.h"
23	#include "tensorflow/core/framework/step_stats.pb.h"
24	#include "tensorflow/core/framework/tensor_shape.pb.h"
25	#include "tensorflow/core/graph/graph.h"
26	#include "tensorflow/core/graph/types.h"
27	#include "tensorflow/core/lib/core/stringpiece.h"
28	#include "tensorflow/core/lib/gtl/array_slice.h"
29	#include "tensorflow/core/platform/macros.h"
30	#include "tensorflow/core/platform/protobuf.h"
31
32	namespace tensorflow {
33	typedef std::unordered_map<StringPiece, int32, StringPieceHasher>
34	NodeNameToCostIdMap;
35
36	class StepStats;
37
38	// CostModel keeps track of the following runtime statistics for nodes
39	// of a single Graph:
40	// The total number of times a node has executed.*
41	// The accumulated execution time (in microseconds) of a node.*
42	// The accumulated size (in bytes) of each node's output.*
43	//
44	// This class is NOT thread-safe.
45	class CostModel {
46	public:
47	// If "global" is true, maintains costs based on Node::cost_id, otherwise
48	// maintains costs based on Node::id.
49	explicit CostModel(bool is_global) : is_global_(is_global) {
50	unknown_shape_.set_unknown_rank(true);
51	}
52
53	// Assigns min_count_ as a function of the median count for a Node.
54	// This value is then used for suppressing the time/size costs of
55	// infrequent operations.
56	// NOTE(tucker): Maybe this should move to a subclass of CostModel.
57	void SuppressInfrequent();
58
59	bool is_global() const { return is_global_; }
60
61	inline int Id(const Node* n) const {
62	if (is_global_) {
63	return n->cost_id();
64	} else {
65	return n->id();
66	}
67	}
68
69	inline int GlobalId(const Node* n, int offset) const {
70	if (is_global_) {
71	return n->cost_id();
72	} else {
73	return n->id() + offset;
74	}
75	}
76
77	// Initializes cost model for 'g'.
78	void InitFromGraph(const Graph& g);
79
80	// Merges costs from cm.
81	// REQUIRES: is_global_ is true for this and for "cm"
82	void MergeFromGlobal(const CostModel& cm);
83
84	// Merges costs from "cm", which has been computed relative to "g".
85	// REQUIRES: is_global_ is true for this, and false for "cm".
86	void MergeFromLocal(const Graph& g, const CostModel& cm);
87
88	void MergeFromStats(const NodeNameToCostIdMap& map, const StepStats& ss);
89
90	// Sets the number of outputs of "node".
91	void SetNumOutputs(const Node* node, int num_outputs);
92
93	// Records that "node" has executed "num_count" more times.
94	void RecordCount(const Node* node, int num_count);
95
96	// Returns how many times "node" has been executed.
97	int32 TotalCount(const Node* node) const;
98
99	// Records that "output_slot" of "node" has produced tensors of
100	// aggregated "bytes".
101	void RecordSize(const Node* node, int output_slot, Bytes bytes);
102
103	// Returns total bytes of tensors produced by "node"s output slot.
104	Bytes TotalBytes(const Node* node, int output_slot) const;
105
106	// Returns a prediction for the size of the tensor at the
107	// output_slot produced by one execution of "node".
108	Bytes SizeEstimate(const Node* node, int output_slot) const;
109
110	// Records that Executions of "node" have taken "time" microseconds.
111	void RecordTime(const Node* node, Microseconds time);
112
113	// Returns the total execution time for "node".
114	Microseconds TotalTime(const Node* node) const;
115
116	// Returns a prediction for one execution of "node".
117	Microseconds TimeEstimate(const Node* node) const;
118
119	// Check that an estimate is available for every OP node in graph.
120	void CheckInitialized(const Graph& graph) const;
121
122	// Records the maximum size in bytes and optionally the corresponding shape of
123	// the tensor generated by "output_slot" of "node". If
124	void RecordMaxMemorySize(const Node* node, int output_slot, Bytes bytes,
125	const TensorShapeProto& tensor_shape,
126	const DataType& dtype);
127
128	// Returns the maximum size in bytes of the tensor generated by "output_slot"
129	// of "node".
130	Bytes MaxMemorySize(const Node* node, int output_slot) const;
131
132	// Returns the shape corresponding to the largest memory size of the tensor
133	// generated by "output_slot" of "node".
134	const TensorShapeProto& MaxMemoryShape(const Node* node,
135	int output_slot) const;
136
137	// Returns the shape corresponding to the largest memory size of the tensor
138	// generated by "output_slot" of "node".
139	DataType MaxMemoryType(const Node* node, int output_slot) const;
140
141	// Returns the size in bytes of temporary memory consumed by "node".
142	Bytes TempMemorySize(const Node* node) const;
143
144	// Returns the size of persistent memory allocated by "node".
145	Bytes PersistentMemorySize(const Node* node) const;
146
147	// Records memory stats such as temp momory and persistent memory.
148	void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats);
149
150	// Records the maximum execution time (in microseconds) of "node".
151	void RecordMaxExecutionTime(const Node* node, Microseconds time);
152
153	// Returns the maximum execution time (in microseconds) of "node".
154	Microseconds MaxExecutionTime(const Node* node) const;
155
156	// Record the unique id of the tensor generated by "output_slot" of "node".
157	// Any other tensor sharing the same id will be an alias, i.e. it will share
158	// the same underlying memory storage area.
159	void RecordAllocationId(const Node* node, int output_slot, int64_t alloc_id);
160
161	// Return the unique id of the tensor generated by "output_slot" of "node".
162	int64_t AllocationId(const Node* node, int output_slot) const;
163
164	bool IsPersistentTensor(const Node* node, int64_t alloc_id) const;
165
166	// Helper routines to encapsulate static estimation heuristics
167
168	// Compute an estimate of the time to copy "b" bytes over the network,
169	// given a fixed cost of "network_latency_millis" milliseconds and
170	// an estimated bandwidth of "estimated_gbps" gigabits per second (note that
171	// this value is in gigabits, not gigabytes).
172	static Microseconds CopyTimeEstimate(Bytes b, double network_latency_millis,
173	double estimated_gbps);
174	static Microseconds ComputationTimeEstimate(int64_t mathops);
175
176	// Add this CostModel into the CostGraphDef.
177	void AddToCostGraphDef(const Graph* graph, CostGraphDef* cost_graph) const;
178
179	// Write the contents of the CostModel to the INFO log.
180	void WriteSummaryToLog() const;
181
182	// Increment the times that the cost model is updated.
183	void IncrementUpdateTimes();
184
185	// Get the times that the cost model is updated.
186	int32 GetUpdateTimes() const;
187
188	private:
189	static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape,
190	const DataType& dtype);
191
192	const bool is_global_;
193
194	// Resizes vectors so that they are large enough for "id" and id's outputs.
195	void Ensure(int id, int num_outputs);
196
197	// Nodes and Edges whose count is < this value
198	// get type/byte estimates of 0.
199	int32 min_count_ = `0`;
200
201	// The number of times the cost model is updated.
202	int32 update_times_ = `0`;
203
204	// Number of times each Node has been executed.
205	std::vector<int32> count_;
206	// Cumulative execution time.
207	std::vector<Microseconds> time_;
208	// Cumulative Bytes output on each channel.
209	std::vector<gtl::InlinedVector<Bytes, `2`>> slot_bytes_;
210
211	// Maximum execution time
212	std::vector<Microseconds> max_exec_time_;
213
214	// Maximum memory usage
215	struct MemUsage {
216	MemUsage() : temp_memory_size (`0`), persistent_memory_size (`0`) {}
217
218	// TODO(yuefengz): temp_memory_size is not being used, remove it.
219	Bytes temp_memory_size;
220	Bytes persistent_memory_size;
221
222	gtl::InlinedVector<Bytes, `2`> output_port_mem;
223	gtl::InlinedVector<TensorShapeProto, `2`> output_port_shape;
224	gtl::InlinedVector<DataType, `2`> output_port_type;
225	};
226	std::vector<MemUsage> max_mem_usage_;
227
228	std::vector<gtl::InlinedVector<int64_t, `2`>> output_port_alloc_ids_;
229
230	std::set<int64_t> persistent_alloc_ids_;
231	std::map<string, std::set<int64_t>> persistent_alloc_ids_by_devices_;
232
233	TensorShapeProto unknown_shape_;
234
235	TF_DISALLOW_COPY_AND_ASSIGN(CostModel);
236	};
237
238	} // namespace tensorflow
239
240	#endif // TENSORFLOW_CORE_GRAPH_COSTMODEL_H_
241

Browse the source code of tensorflow/tensorflow/core/graph/costmodel.h