1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/core/graph/costmodel.h" |
17 | |
18 | #include <algorithm> |
19 | #include <vector> |
20 | |
21 | #include "tensorflow/core/framework/allocation_description.pb.h" |
22 | #include "tensorflow/core/framework/cost_graph.pb.h" |
23 | #include "tensorflow/core/framework/step_stats.pb.h" |
24 | #include "tensorflow/core/framework/tensor_description.pb.h" |
25 | #include "tensorflow/core/graph/graph.h" |
26 | #include "tensorflow/core/platform/logging.h" |
27 | |
28 | namespace tensorflow { |
29 | namespace { |
30 | const Microseconds kDefaultTimeEstimate(1); |
31 | const Microseconds kMinTimeEstimate(1); |
32 | } // namespace |
33 | |
34 | void CostModel::SuppressInfrequent() { |
35 | // Find the median of the non-zero counts, and use half of its value |
36 | // as the cutoff for a "normal" execution mode node. |
37 | if (count_.empty()) return; |
38 | std::vector<int32> non_zero; |
39 | for (auto v : count_) { |
40 | if (v > 0) non_zero.push_back(v); |
41 | } |
42 | const size_t sz = non_zero.size(); |
43 | if (sz > 0) { |
44 | std::nth_element(non_zero.begin(), non_zero.begin() + sz / 2, |
45 | non_zero.end()); |
46 | int32_t median_value = non_zero[sz / 2]; |
47 | min_count_ = median_value / 2; |
48 | VLOG(1) << "num non_zero vals: " << non_zero.size() << " median_value " |
49 | << median_value; |
50 | } else { |
51 | min_count_ = 1; |
52 | } |
53 | } |
54 | |
55 | void CostModel::MergeFromLocal(const Graph& g, const CostModel& cm) { |
56 | CHECK(is_global_); |
57 | CHECK(!cm.is_global()); |
58 | for (const Node* n : g.nodes()) { |
59 | const int local_id = cm.Id(n); |
60 | const int global_id = Id(n); |
61 | if (local_id < 0 || global_id < 0) continue; |
62 | int num_slots = cm.slot_bytes_[local_id].size(); |
63 | Ensure(global_id, num_slots); |
64 | count_[global_id] += cm.count_[local_id]; |
65 | time_[global_id] += cm.time_[local_id]; |
66 | if (num_slots > 0) { |
67 | if (slot_bytes_[global_id].empty()) { |
68 | slot_bytes_[global_id].resize(num_slots); |
69 | } else { |
70 | CHECK_EQ(num_slots, slot_bytes_[global_id].size()); |
71 | } |
72 | for (int s = 0; s < num_slots; ++s) { |
73 | slot_bytes_[global_id][s] += cm.slot_bytes_[local_id][s]; |
74 | } |
75 | } |
76 | } |
77 | } |
78 | |
79 | void CostModel::MergeFromGlobal(const CostModel& cm) { |
80 | CHECK(is_global_); |
81 | CHECK_EQ(true, cm.is_global()); |
82 | const int num_nodes = cm.count_.size(); |
83 | for (int i = num_nodes - 1; i >= 0; --i) { |
84 | count_[i] += cm.count_[i]; |
85 | time_[i] += cm.time_[i]; |
86 | int num_slots = cm.slot_bytes_[i].size(); |
87 | Ensure(i, num_slots); |
88 | if (num_slots > 0) { |
89 | if (slot_bytes_[i].empty()) { |
90 | slot_bytes_[i].resize(num_slots); |
91 | } else { |
92 | CHECK_EQ(num_slots, slot_bytes_[i].size()); |
93 | } |
94 | for (int s = 0; s < num_slots; ++s) { |
95 | slot_bytes_[i][s] += cm.slot_bytes_[i][s]; |
96 | } |
97 | } |
98 | } |
99 | } |
100 | |
101 | void CostModel::MergeFromStats(const NodeNameToCostIdMap& map, |
102 | const StepStats& ss) { |
103 | CHECK(is_global_); |
104 | for (auto& ds : ss.dev_stats()) { |
105 | for (auto& ns : ds.node_stats()) { |
106 | NodeNameToCostIdMap::const_iterator iter = map.find(ns.node_name()); |
107 | // We don't keep stats for nodes not in the global graph, i.e. |
108 | // copy/send/recv nodes, feed/fetch, etc. |
109 | if (iter == map.end()) continue; |
110 | int32_t global_id = iter->second; |
111 | Ensure(global_id, ns.output_size()); |
112 | int64_t elapsed_micros = |
113 | ns.op_end_rel_micros() - ns.op_start_rel_micros(); |
114 | count_[global_id]++; |
115 | time_[global_id] += elapsed_micros; |
116 | for (auto& no : ns.output()) { |
117 | int si = no.slot(); |
118 | if (static_cast<size_t>(si) >= slot_bytes_[global_id].size()) { |
119 | slot_bytes_[global_id].resize(1 + si); |
120 | } |
121 | slot_bytes_[global_id][si] += |
122 | no.tensor_description().allocation_description().requested_bytes(); |
123 | } |
124 | } |
125 | } |
126 | } |
127 | |
128 | void CostModel::Ensure(int id, int num_outputs) { |
129 | if (slot_bytes_.size() <= static_cast<size_t>(id)) { |
130 | slot_bytes_.resize(id + 1); |
131 | count_.resize(id + 1); |
132 | time_.resize(id + 1); |
133 | max_mem_usage_.resize(id + 1); |
134 | max_exec_time_.resize(id + 1); |
135 | output_port_alloc_ids_.resize(id + 1); |
136 | } |
137 | if (num_outputs > 0) { |
138 | auto perslot = &slot_bytes_[id]; |
139 | auto output_port_alloc_ids = &output_port_alloc_ids_[id]; |
140 | auto max_mem_usage = &max_mem_usage_[id]; |
141 | |
142 | CHECK_LE(perslot->size(), num_outputs); |
143 | DCHECK_EQ(output_port_alloc_ids->size(), perslot->size()); |
144 | DCHECK_EQ(max_mem_usage->output_port_mem.size(), perslot->size()); |
145 | DCHECK_EQ(max_mem_usage->output_port_shape.size(), perslot->size()); |
146 | DCHECK_EQ(max_mem_usage->output_port_type.size(), perslot->size()); |
147 | |
148 | perslot->resize(num_outputs, Bytes(-1)); |
149 | output_port_alloc_ids->resize(num_outputs, -1); |
150 | max_mem_usage->output_port_mem.resize(num_outputs, Bytes(-1)); |
151 | max_mem_usage->output_port_shape.resize(num_outputs, unknown_shape_); |
152 | max_mem_usage->output_port_type.resize(num_outputs, DT_INVALID); |
153 | } |
154 | } |
155 | |
156 | void CostModel::SetNumOutputs(const Node* node, int num_outputs) { |
157 | const int id = Id(node); |
158 | if (id < 0) return; |
159 | // Do not resize the number of slots before checking its existing number of |
160 | // slots. |
161 | Ensure(id, 0); |
162 | auto perslot = &slot_bytes_[id]; |
163 | if (!perslot->empty()) { |
164 | CHECK_EQ(num_outputs, perslot->size()) |
165 | << "Cannot resize slot_bytes, node=" << node->name(); |
166 | } |
167 | Ensure(id, num_outputs); |
168 | } |
169 | |
170 | void CostModel::RecordCount(const Node* node, int count) { |
171 | const int id = Id(node); |
172 | if (id < 0) return; |
173 | CHECK_LT(id, slot_bytes_.size()); |
174 | count_[id] += count; |
175 | } |
176 | |
177 | int32 CostModel::TotalCount(const Node* node) const { |
178 | const int id = Id(node); |
179 | if (id < 0) return 0; |
180 | return (static_cast<size_t>(id) < slot_bytes_.size()) ? count_[id] : 0; |
181 | } |
182 | |
183 | void CostModel::RecordSize(const Node* node, int slot, Bytes bytes) { |
184 | const int id = Id(node); |
185 | if (id < 0) return; |
186 | CHECK_LT(id, slot_bytes_.size()); |
187 | auto perslot = &slot_bytes_[id]; |
188 | CHECK_LT(slot, perslot->size()); |
189 | auto v = &(*perslot)[slot]; |
190 | if (*v >= 0) { |
191 | *v += bytes; |
192 | } else { |
193 | *v = bytes; |
194 | } |
195 | } |
196 | |
197 | Bytes CostModel::TotalBytes(const Node* node, int slot) const { |
198 | const int id = Id(node); |
199 | if (id < 0 || static_cast<size_t>(id) >= slot_bytes_.size() || |
200 | slot_bytes_[id].size() <= static_cast<size_t>(slot)) { |
201 | return Bytes(0); |
202 | } |
203 | return slot_bytes_[id][slot]; |
204 | } |
205 | |
206 | Bytes CostModel::SizeEstimate(const Node* node, int slot) const { |
207 | int32_t count = TotalCount(node); |
208 | if (count < min_count_) return Bytes(0); |
209 | return TotalBytes(node, slot) / std::max(1, TotalCount(node)); |
210 | } |
211 | |
212 | void CostModel::RecordTime(const Node* node, Microseconds time) { |
213 | const int id = Id(node); |
214 | if (id < 0) return; |
215 | DCHECK(node->IsOp()) << node->DebugString(); |
216 | Ensure(id, node->num_outputs()); |
217 | time_[id] += time; |
218 | } |
219 | |
220 | Microseconds CostModel::TotalTime(const Node* node) const { |
221 | DCHECK(node->IsOp()) << node->DebugString(); |
222 | const int id = Id(node); |
223 | if (id < 0 || static_cast<size_t>(id) >= time_.size() || |
224 | time_[id] < Microseconds(0)) { |
225 | return Microseconds(0); |
226 | } |
227 | return time_[id]; |
228 | } |
229 | |
230 | Microseconds CostModel::TimeEstimate(const Node* node) const { |
231 | int32_t count = TotalCount(node); |
232 | if (count <= min_count_) return kMinTimeEstimate; |
233 | return std::max(kMinTimeEstimate, TotalTime(node) / std::max(1, count)); |
234 | } |
235 | |
236 | void CostModel::CheckInitialized(const Graph& graph) const { |
237 | for (const Node* n : graph.op_nodes()) { |
238 | CHECK(static_cast<size_t>(n->id()) < time_.size() && |
239 | time_[n->id()] >= Microseconds(0)) |
240 | << ": no time estimate for " << n->DebugString(); |
241 | |
242 | CHECK(static_cast<size_t>(n->id()) < slot_bytes_.size()) |
243 | << ": no size estimate for " << n->DebugString(); |
244 | const auto& perslot = slot_bytes_[n->id()]; |
245 | for (size_t i = 0; i < perslot.size(); i++) { |
246 | CHECK_GE(perslot[i], Bytes(0)) << ": no size estimate for output# " << i |
247 | << " of " << n->DebugString(); |
248 | } |
249 | } |
250 | } |
251 | |
252 | void CostModel::RecordMaxMemorySize(const Node* node, int output_slot, |
253 | Bytes bytes, |
254 | const TensorShapeProto& tensor_shape, |
255 | const DataType& dtype) { |
256 | const int id = Id(node); |
257 | if (id < 0) return; |
258 | if (output_slot >= node->num_outputs()) { |
259 | LOG(ERROR) << "Unexpected output slot for node " << node->DebugString() |
260 | << ". Got " << output_slot << " but its num_outputs is " |
261 | << node->num_outputs(); |
262 | return; |
263 | } |
264 | Ensure(id, node->num_outputs()); |
265 | auto& current_max = max_mem_usage_[id].output_port_mem[output_slot]; |
266 | // If the memory allocator doesn't track memory usage, let's infer a lower |
267 | // bound from the tensor shape and its data type. |
268 | if (bytes.value() < 0) { |
269 | bytes = MinTensorMemoryUsage(tensor_shape, dtype); |
270 | } |
271 | if (bytes.value() > current_max.value()) { |
272 | current_max = bytes.value(); |
273 | max_mem_usage_[id].output_port_shape[output_slot] = tensor_shape; |
274 | max_mem_usage_[id].output_port_type[output_slot] = dtype; |
275 | } |
276 | } |
277 | |
278 | Bytes CostModel::MaxMemorySize(const Node* node, int slot) const { |
279 | const int id = Id(node); |
280 | if (id < 0 || static_cast<size_t>(id) >= max_mem_usage_.size() || |
281 | max_mem_usage_[id].output_port_mem.size() <= static_cast<size_t>(slot)) { |
282 | return Bytes(0); |
283 | } |
284 | return max_mem_usage_[id].output_port_mem[slot]; |
285 | } |
286 | |
287 | const TensorShapeProto& CostModel::MaxMemoryShape(const Node* node, |
288 | int slot) const { |
289 | const int id = Id(node); |
290 | if (id < 0 || static_cast<size_t>(id) >= max_mem_usage_.size() || |
291 | max_mem_usage_[id].output_port_shape.size() <= |
292 | static_cast<size_t>(slot)) { |
293 | return unknown_shape_; |
294 | } |
295 | return max_mem_usage_[id].output_port_shape[slot]; |
296 | } |
297 | |
298 | DataType CostModel::MaxMemoryType(const Node* node, int slot) const { |
299 | const int id = Id(node); |
300 | if (id < 0 || static_cast<size_t>(id) >= max_mem_usage_.size() || |
301 | max_mem_usage_[id].output_port_type.size() <= static_cast<size_t>(slot)) { |
302 | return DT_INVALID; |
303 | } |
304 | return max_mem_usage_[id].output_port_type[slot]; |
305 | } |
306 | |
307 | Bytes CostModel::TempMemorySize(const Node* node) const { |
308 | const int id = Id(node); |
309 | if (id < 0) { |
310 | return Bytes(0); |
311 | } |
312 | return max_mem_usage_[id].temp_memory_size; |
313 | } |
314 | |
315 | Bytes CostModel::PersistentMemorySize(const Node* node) const { |
316 | const int id = Id(node); |
317 | if (id < 0) { |
318 | return Bytes(0); |
319 | } |
320 | return max_mem_usage_[id].persistent_memory_size; |
321 | } |
322 | |
323 | void CostModel::RecordMemoryStats(const Node* node, |
324 | const MemoryStats& memory_stats) { |
325 | const int id = Id(node); |
326 | if (id < 0) return; |
327 | max_mem_usage_[id].temp_memory_size = memory_stats.temp_memory_size(); |
328 | max_mem_usage_[id].persistent_memory_size = |
329 | memory_stats.persistent_memory_size(); |
330 | for (int64_t alloc_id : memory_stats.persistent_tensor_alloc_ids()) { |
331 | if (alloc_id > 0) { |
332 | persistent_alloc_ids_.insert(alloc_id); |
333 | } |
334 | } |
335 | } |
336 | |
337 | void CostModel::RecordMaxExecutionTime(const Node* node, Microseconds time) { |
338 | const int id = Id(node); |
339 | if (id < 0) return; |
340 | Ensure(id, node->num_outputs()); |
341 | max_exec_time_[id] = std::max(max_exec_time_[id], time); |
342 | } |
343 | |
344 | Microseconds CostModel::MaxExecutionTime(const Node* node) const { |
345 | const int id = Id(node); |
346 | if (id < 0 || static_cast<size_t>(id) >= max_exec_time_.size()) { |
347 | return Microseconds(0); |
348 | } |
349 | return max_exec_time_[id]; |
350 | } |
351 | |
352 | void CostModel::RecordAllocationId(const Node* node, int output_slot, |
353 | int64_t alloc_id) { |
354 | const int id = Id(node); |
355 | if (id < 0) return; |
356 | Ensure(id, node->num_outputs()); |
357 | output_port_alloc_ids_[id][output_slot] = alloc_id; |
358 | } |
359 | |
360 | int64_t CostModel::AllocationId(const Node* node, int slot) const { |
361 | const int id = Id(node); |
362 | if (id < 0 || static_cast<size_t>(id) >= output_port_alloc_ids_.size() || |
363 | output_port_alloc_ids_[id].size() <= static_cast<size_t>(slot)) { |
364 | return -1; |
365 | } |
366 | return output_port_alloc_ids_[id][slot]; |
367 | } |
368 | |
369 | bool CostModel::IsPersistentTensor(const Node* node, int64_t alloc_id) const { |
370 | if (persistent_alloc_ids_.count(alloc_id) > 0) { |
371 | return true; |
372 | } |
373 | if (persistent_alloc_ids_by_devices_.find(node->assigned_device_name()) == |
374 | persistent_alloc_ids_by_devices_.end()) { |
375 | return false; |
376 | } |
377 | return persistent_alloc_ids_by_devices_.at(node->assigned_device_name()) |
378 | .count(alloc_id); |
379 | } |
380 | |
381 | Microseconds CostModel::CopyTimeEstimate(Bytes b, double network_latency_millis, |
382 | double estimated_gbps) { |
383 | // TODO(jeff,sanjay): estimate cost based on bandwidth along the |
384 | // communication path and the type of transport we are using between |
385 | // devices. |
386 | // |
387 | // We assume the copy time follows a linear model: |
388 | // copy_time = copy_bytes / rate + min_time |
389 | int64_t copy_bytes = b.value(); |
390 | const double bytes_per_usec = estimated_gbps * 1000.0 / 8; |
391 | const double min_micros = network_latency_millis * 1000.0; |
392 | return Microseconds( |
393 | static_cast<int64_t>(copy_bytes / bytes_per_usec + min_micros)); |
394 | } |
395 | |
396 | Microseconds CostModel::ComputationTimeEstimate(int64_t math_ops) { |
397 | // TODO(jeff,sanjay): Eventually we should pass in the type of device |
398 | // (GPU vs. CPU) and use that to affect the estimate. |
399 | |
400 | // We estimate the microseconds using that value. We divide |
401 | // by 1000 to convert the madd number into microseconds (assuming |
402 | // roughly 1000 madds per microsecond (~1 GHz for one core)). |
403 | return Microseconds(math_ops / 1000); |
404 | } |
405 | |
406 | void CostModel::IncrementUpdateTimes() { update_times_++; } |
407 | |
408 | int32 CostModel::GetUpdateTimes() const { return update_times_; } |
409 | |
410 | // ---------------------------------------------------------------------------- |
411 | // InitCostModel |
412 | // ---------------------------------------------------------------------------- |
413 | |
414 | namespace { |
415 | |
416 | static void AddNodesToCostModel(const Graph& g, CostModel* cost_model) { |
417 | for (Node* n : g.nodes()) { |
418 | const int num_outputs = n->num_outputs(); |
419 | cost_model->SetNumOutputs(n, num_outputs); |
420 | for (int output = 0; output < num_outputs; output++) { |
421 | // Set up an initial bogus estimate for the node's outputs |
422 | cost_model->RecordSize(n, output, Bytes(1)); |
423 | } |
424 | } |
425 | } |
426 | |
427 | static void AssignSizes(const Graph& g, CostModel* cost_model) { |
428 | for (const Edge* e : g.edges()) { |
429 | // Skip if it is a control edge. |
430 | if (e->IsControlEdge()) { |
431 | continue; |
432 | } |
433 | const Node* src = e->src(); |
434 | |
435 | // TODO(josh11b): Get an estimate from the Op |
436 | Bytes size(1); |
437 | cost_model->RecordSize(src, e->src_output(), size); |
438 | } |
439 | } |
440 | |
441 | // This generates an extremely simple initial guess for the |
442 | // computation cost of each node. For ordinary Ops, its value should quickly |
443 | // be wiped out by the real runtime measurements. For other Ops we don't |
444 | // actually generate measurements, so suppression of infrequent Ops ends up |
445 | // giving them 0 costs. So, this is not of much consequence except perhaps |
446 | // in tests. |
447 | static Microseconds TimeEstimateForNode(CostModel* cost_model, Node* n) { |
448 | CHECK(n->IsOp()); |
449 | VLOG(2) << "Node " << n->id() << ": " << n->name() |
450 | << " type_string: " << n->type_string(); |
451 | if (IsConstant(n) || IsVariable(n)) { |
452 | return Microseconds(0); |
453 | } |
454 | return kDefaultTimeEstimate; |
455 | } |
456 | |
457 | static void EstimateComputationCosts(const Graph& g, CostModel* cost_model) { |
458 | for (Node* n : g.nodes()) { |
459 | if (!n->IsOp()) continue; |
460 | cost_model->RecordTime(n, TimeEstimateForNode(cost_model, n)); |
461 | } |
462 | } |
463 | |
464 | } // namespace |
465 | |
466 | void CostModel::InitFromGraph(const Graph& g) { |
467 | const int num_node_ids = g.num_node_ids(); |
468 | slot_bytes_.reserve(num_node_ids); |
469 | count_.reserve(num_node_ids); |
470 | time_.reserve(num_node_ids); |
471 | max_mem_usage_.reserve(num_node_ids); |
472 | max_exec_time_.reserve(num_node_ids); |
473 | output_port_alloc_ids_.reserve(num_node_ids); |
474 | |
475 | AddNodesToCostModel(g, this); |
476 | AssignSizes(g, this); |
477 | EstimateComputationCosts(g, this); |
478 | CheckInitialized(g); |
479 | } |
480 | |
481 | void CostModel::AddToCostGraphDef(const Graph* graph, |
482 | CostGraphDef* cost_graph) const { |
483 | std::vector<const Edge*> inputs; |
484 | std::vector<const Edge*> control_inputs; |
485 | int offset = cost_graph->node_size(); |
486 | for (const Node* n : graph->nodes()) { |
487 | CostGraphDef::Node* cnode = cost_graph->add_node(); |
488 | cnode->set_name(n->name()); |
489 | cnode->set_device(n->assigned_device_name()); |
490 | cnode->set_id(GlobalId(n, offset)); |
491 | |
492 | inputs.clear(); |
493 | inputs.resize(n->num_inputs(), nullptr); |
494 | control_inputs.clear(); |
495 | for (const Edge* e : n->in_edges()) { |
496 | if (e->IsControlEdge()) { |
497 | control_inputs.push_back(e); |
498 | } else { |
499 | inputs[e->dst_input()] = e; |
500 | } |
501 | } |
502 | std::sort(control_inputs.begin(), control_inputs.end(), |
503 | [this](Edge const* a, Edge const* b) { |
504 | return Id(a->src()) < Id(b->src()); |
505 | }); |
506 | |
507 | for (const Edge* e : inputs) { |
508 | CostGraphDef::Node::InputInfo* input_info = cnode->add_input_info(); |
509 | input_info->set_preceding_node(GlobalId(e->src(), offset)); |
510 | input_info->set_preceding_port(e->src_output()); |
511 | } |
512 | |
513 | for (int i = 0; i < n->num_outputs(); i++) { |
514 | CostGraphDef::Node::OutputInfo* output_info = cnode->add_output_info(); |
515 | int64_t alloc_id = AllocationId(n, i); |
516 | int64_t alias_to_input = -1; |
517 | for (const Edge* e : inputs) { |
518 | int64_t input_alloc_id = AllocationId(e->src(), e->src_output()); |
519 | if (input_alloc_id == alloc_id) { |
520 | alias_to_input = e->dst_input(); |
521 | break; |
522 | } |
523 | } |
524 | output_info->set_alias_input_port(alias_to_input); |
525 | output_info->set_dtype(MaxMemoryType(n, i)); |
526 | *output_info->mutable_shape() = MaxMemoryShape(n, i); |
527 | if (alias_to_input < 0 && IsPersistentTensor(n, alloc_id)) { |
528 | output_info->set_size(0); |
529 | } else { |
530 | output_info->set_size(MaxMemorySize(n, i).value()); |
531 | } |
532 | } |
533 | |
534 | for (const Edge* e : control_inputs) { |
535 | cnode->add_control_input(GlobalId(e->src(), offset)); |
536 | } |
537 | |
538 | cnode->set_temporary_memory_size(TempMemorySize(n).value()); |
539 | cnode->set_persistent_memory_size(PersistentMemorySize(n).value()); |
540 | |
541 | cnode->set_compute_cost(MaxExecutionTime(n).value()); |
542 | |
543 | // For now we treat all send nodes as final. |
544 | // TODO(yuanbyu): Send nodes for fetches shouldn't be treated as final. |
545 | cnode->set_is_final(n->IsSend()); |
546 | } |
547 | } |
548 | |
549 | void CostModel::WriteSummaryToLog() const { |
550 | LOG(INFO) << " min_count_=" << min_count_; |
551 | for (size_t i = 0; i < count_.size(); ++i) { |
552 | LOG(INFO) << "Node " << i << " count " << count_[i] << " total time " |
553 | << time_[i] << " avg time " |
554 | << (time_[i] / (std::max(1, count_[i]))); |
555 | } |
556 | } |
557 | |
558 | Bytes CostModel::MinTensorMemoryUsage(const TensorShapeProto& tensor_shape, |
559 | const DataType& dtype) { |
560 | if (tensor_shape.unknown_rank()) { |
561 | return Bytes(-1); |
562 | } |
563 | |
564 | size_t num_coefficients = 1; |
565 | for (const TensorShapeProto::Dim& dim : tensor_shape.dim()) { |
566 | // If the dimension is unknown, it has to be at least 1 |
567 | num_coefficients *= std::max<size_t>(dim.size(), 1); |
568 | } |
569 | return Bytes(num_coefficients * DataTypeSize(dtype)); |
570 | } |
571 | |
572 | } // namespace tensorflow |
573 | |