constant_folding.cc source code [tensorflow/tensorflow/core/common_runtime/constant_folding.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "tensorflow/core/common_runtime/constant_folding.h"
17
18	#include <algorithm>
19	#include <atomic>
20	#include <set>
21	#include <unordered_map>
22	#include <vector>
23
24	#include "tensorflow/core/common_runtime/device_factory.h"
25	#include "tensorflow/core/common_runtime/executor.h"
26	#include "tensorflow/core/common_runtime/function_utils.h"
27	#include "tensorflow/core/common_runtime/graph_runner.h"
28	#include "tensorflow/core/common_runtime/memory_types.h"
29	#include "tensorflow/core/common_runtime/rendezvous_mgr.h"
30	#include "tensorflow/core/framework/log_memory.h"
31	#include "tensorflow/core/framework/op_kernel.h"
32	#include "tensorflow/core/framework/types.h"
33	#include "tensorflow/core/framework/types.pb.h"
34	#include "tensorflow/core/graph/algorithm.h"
35	#include "tensorflow/core/graph/node_builder.h"
36	#include "tensorflow/core/graph/subgraph.h"
37	#include "tensorflow/core/lib/core/threadpool.h"
38	#include "tensorflow/core/lib/gtl/cleanup.h"
39	#include "tensorflow/core/lib/gtl/flatset.h"
40	#include "tensorflow/core/lib/strings/strcat.h"
41	#include "tensorflow/core/platform/denormal.h"
42	#include "tensorflow/core/platform/setround.h"
43	#include "tensorflow/core/public/session_options.h"
44
45	namespace tensorflow {
46
47	namespace {
48
49	const char kScopedAllocatorAttrName[] = "_scoped_allocator";
50
51	// Test to see if the Op is one that turns into a constant when its
52	// inputs' shapes are known.
53	bool IsShapeOp(const Node* n) {
54	const auto& ts = n->type_string();
55	return ts == "Shape" \|\| ts == "ShapeN" \|\| ts == "Rank" \|\| ts == "Size";
56	}
57
58	// Reads the partially-known shape of each of n's inputs from shape_map, and
59	// stores it to input_shapes. Returns false if any input does not have a shape
60	// in shape_map.
61	bool ReadPartialShapesFromShapeMap(
62	const Node* n,
63	const std::unordered_map<string, std::vector<PartialTensorShape>>*
64	shape_map,
65	std::vector<PartialTensorShape>* input_shapes) {
66	CHECK(shape_map != nullptr);
67	input_shapes->resize(n->num_inputs());
68	for (const Edge* in : n->in_edges()) {
69	// Don't need to check if incoming control edges have known shapes.
70	if (in->IsControlEdge()) continue;
71	const auto known_shape_iter = shape_map->find(in->src()->name());
72	if (known_shape_iter == shape_map->end()) {
73	// One of n's inputs doesn't have known shapes, so don't replace n.
74	return false;
75	}
76	const auto& known_shape = known_shape_iter ->second;
77	CHECK_GT(known_shape.size(), in->src_output()) << known_shape_iter ->first;
78	DCHECK_GE(in->dst_input(), `0`);
79	DCHECK_LT(in->dst_input(), input_shapes->size());
80	(*input_shapes)[in->dst_input()] = known_shape [in->src_output()];
81	}
82	return true;
83	}
84
85	// If all of n's inputs have fully-defined shapes, inserts those shapes as a
86	// vector of Tensors in the shape_replacement_map.
87	bool MaybeReplaceShapeOrShapeNOp(
88	const Node* n, const std::vector<PartialTensorShape>& input_shapes,
89	std::unordered_map<const Node, std::vector<Tensor>>
90	shape_replacement_map) {
91	std::vector<Tensor> defined_shape;
92	for (const auto& shape : input_shapes) {
93	if (!shape.IsFullyDefined()) {
94	return false;
95	}
96	const int rank = shape.dims();
97	DataType op_type = n->output_type(`0`);
98	Tensor t(op_type, TensorShape ({rank}));
99	if (op_type == DT_INT64) {
100	auto vec = t.vec<int64_t>();
101	for (int i = `0`; i < rank; ++i) {
102	vec (i) = shape.dim_size(i);
103	}
104	} else {
105	CHECK(op_type == DT_INT32);
106	auto vec = t.vec<int32>();
107	for (int i = `0`; i < rank; ++i) {
108	if (shape.dim_size(i) > INT_MAX) {
109	VLOG(`1`) << "Node " << n->name() << " has input shape dimension " << i
110	<< " of " << shape.dim_size(i) << " but type INT32 "
111	<< " so not replacing as constant: this will trigger a "
112	"runtime error later.";
113	return false;
114	}
115	vec (i) = static_cast<int32>(shape.dim_size(i));
116	}
117	}
118	defined_shape.push_back(t);
119	}
120	// All the inputs had known shapes so we can replace the node by constants
121	// later in the rewrite.
122	shape_replacement_map->insert({n, defined_shape});
123	return true;
124	}
125
126	// If n's input has defined rank, inserts that rank as a Tensor in the
127	// shape_replacement_map.
128	bool MaybeReplaceRankOp(const Node* n,
129	const std::vector<PartialTensorShape>& input_shapes,
130	std::unordered_map<const Node, std::vector<Tensor>>
131	shape_replacement_map) {
132	CHECK_EQ(input_shapes.size(), `1`);
133	if (input_shapes [`0`].unknown_rank()) {
134	return false;
135	}
136	Tensor t(DT_INT32, TensorShape ({}));
137	t.scalar<int32>()() = input_shapes [`0`].dims();
138	shape_replacement_map->insert({n, {t}});
139	return true;
140	}
141
142	// If n's input has defined size, inserts that size as a Tensor in the
143	// shape_replacement_map.
144	bool MaybeReplaceSizeOp(const Node* n,
145	const std::vector<PartialTensorShape>& input_shapes,
146	std::unordered_map<const Node, std::vector<Tensor>>
147	shape_replacement_map) {
148	CHECK_EQ(input_shapes.size(), `1`);
149	if (!input_shapes [`0`].IsFullyDefined()) {
150	return false;
151	}
152	DataType op_type = n->output_type(`0`);
153	Tensor t(op_type, TensorShape ({}));
154	int64_t size = input_shapes [`0`].num_elements();
155	if (op_type == DT_INT64) {
156	t.scalar<int64_t>()() = size;
157	} else {
158	CHECK(op_type == DT_INT32);
159	if (size > INT_MAX) {
160	VLOG(`1`) << "Node " << n->name() << " has input shape size " << size
161	<< " but type INT32 "
162	<< " so not replacing as constant: this will trigger a runtime "
163	"error later.";
164	return false;
165	}
166	t.scalar<int32>()() = static_cast<int32>(size);
167	}
168	shape_replacement_map->insert({n, {t}});
169	return true;
170	}
171
172	// If n is a shape Op (Shape, ShapeN, Rank, or Size) and its inputs have their
173	// shapes specified in shape_map, then adds to shape_replacement_map a mapping
174	// from n to a vector of Tensors, where Tensor k is the (statically known) value
175	// on n's kth output edge. shape_replacement_map has an entry for n iff
176	// MaybeReplaceShapeOp returns true, so it's valid to use
177	// shape_replacement_map->count(n) as a test to see if n is a shape op that can
178	// be replaced.
179	bool MaybeReplaceShapeOp(
180	const Node* n,
181	const std::unordered_map<string, std::vector<PartialTensorShape>>*
182	shape_map,
183	std::unordered_map<const Node, std::vector<Tensor>>
184	shape_replacement_map) {
185	if (shape_map == nullptr \|\| !IsShapeOp(n)) {
186	return false;
187	}
188	// input_shapes will contain the shapes of each of n's inputs.
189	std::vector<PartialTensorShape> input_shapes;
190	if (!ReadPartialShapesFromShapeMap(n, shape_map, &input_shapes)) {
191	return false;
192	}
193	const auto& ts = n->type_string();
194	if (ts == "Shape" \|\| ts == "ShapeN") {
195	if (!MaybeReplaceShapeOrShapeNOp(n, input_shapes, shape_replacement_map)) {
196	return false;
197	}
198	} else if (ts == "Rank") {
199	if (!MaybeReplaceRankOp(n, input_shapes, shape_replacement_map)) {
200	return false;
201	}
202	} else {
203	CHECK_EQ(ts, "Size");
204	if (!MaybeReplaceSizeOp(n, input_shapes, shape_replacement_map)) {
205	return false;
206	}
207	}
208	return true;
209	}
210
211	// Returns true if n can be evaluated as constant. shape_map maps from
212	// nodes to the partially-known shapes of their outputs. consider if
213	// non-null returns a bool indicating whether a given (non-Const,
214	// non-Shape) node is eligible to be
215	// constant-propagated. shape_replacement_map is filled in with a
216	// vector of constant output tensors for constant-foldable shape nodes
217	// (Shape, ShapeN, Size, or Rank).
218	bool IsConstantFoldable(
219	const Node* n,
220	const std::unordered_map<string, std::vector<PartialTensorShape>>*
221	shape_map,
222	const std::function<bool(const Node*)>& consider,
223	int64_t max_constant_size_in_bytes,
224	std::unordered_map<const Node, std::vector<Tensor>>
225	shape_replacement_map) {
226	if (n->IsConstant()) {
227	// Skip constant folding resources as they cannot be deep copied.
228	return n->output_type(`0`) != DT_RESOURCE;
229	}
230	if (MaybeReplaceShapeOp(n, shape_map, shape_replacement_map)) {
231	return true;
232	}
233	if (n->op_def().is_stateful()) {
234	return false;
235	}
236	if (consider && !consider (n)) {
237	return false;
238	}
239	if (shape_map != nullptr) {
240	// We can skip the node if an output is known to be oversized.
241	auto shape_it = shape_map->find(n->name());
242	if (shape_it != shape_map->end()) {
243	for (int64_t i = `0`; i < shape_it ->second.size(); ++i) {
244	const auto& out_shape = shape_it ->second [i];
245	if (out_shape.IsFullyDefined() &&
246	out_shape.num_elements() * DataTypeSize(n->output_type(i)) >
247	max_constant_size_in_bytes) {
248	return false;
249	}
250	}
251	}
252	}
253	if (n->IsControlFlow() \|\| n->IsSend() \|\| n->IsRecv()) {
254	return false;
255	}
256	// TODO(yuanbyu): For now disable these session handle operations.
257	if (n->IsGetSessionHandle() \|\| n->IsGetSessionTensor() \|\|
258	n->IsDeleteSessionTensor()) {
259	return false;
260	}
261	if (n->IsSource()) {
262	return false;
263	}
264	if (n->IsSink()) {
265	return false;
266	}
267	if (n->IsFakeParam()) {
268	return false;
269	}
270	// Since constant-folding runs on the CPU, do not attempt to constant-fold
271	// operators that have no CPU kernel. Also implies that we will not
272	// constant-fold functions.
273	// TODO(phawkins): allow constant-folding for functions; functions may
274	// be arbitrarily expensive to execute.
275	if (!KernelDefAvailable(DeviceType (DEVICE_CPU), n->def())) {
276	return false;
277	}
278	// Do not constant fold nodes which will be allocated by ScopedAllocator.
279	// This is because the constant-folding graph will not contain the
280	// `_ScopedAllocator` node, and that is necessary to be able to run a node
281	// that will use this allocator.
282	if (n->attrs().Find(kScopedAllocatorAttrName) != nullptr) {
283	VLOG(`2`) << "Skip node [" << n->DebugString()
284	<< "] for constant folding due to scoped allocator";
285	return false;
286	}
287	return true;
288	}
289
290	// If n is eligible for constant-folding, adds it to nodes, and places its
291	// control dependencies and those transitively of its constant-foldable inputs
292	// into constant_control_deps. If n is a constant-foldable shape node (Shape,
293	// ShapeN, Rank, or Size), also puts its outputs into shape_replacement_map.
294	void ConsiderConstantFoldableNode(
295	Node* n, const ConstantFoldingOptions& opts, std::vector<Node> nodes,
296	std::unordered_map<const Node, gtl::FlatSet<Node>>* constant_control_deps,
297	std::unordered_map<const Node, std::vector<Tensor>> shape_replacement_map,
298	bool* internal_node_inserted) {
299	if (IsConstantFoldable(n, opts.shape_map, opts.consider,
300	opts.max_constant_size_in_bytes,
301	shape_replacement_map)) {
302	// A node is constant provided all of its non-control incoming Tensors come
303	// from constant nodes, or it's a shape Op with statically known inputs in
304	// which case it is placed in shape_replacement_map.
305	//
306	// We allow control dependencies from non-constant nodes to constant nodes,
307	// but to preserve the graph structure we must transfer the control
308	// dependency onto any constant replacement.
309	bool all_parents_constant = true;
310	for (const Edge* in : n->in_edges()) {
311	// Allows non-constant -> constant control edges.
312	if (!in->IsControlEdge() &&
313	constant_control_deps->count(in->src()) == `0`) {
314	all_parents_constant = false;
315	break;
316	}
317	}
318	if (all_parents_constant \|\| shape_replacement_map->count(n) != `0`) {
319	gtl::FlatSet<Node>& control_deps = (constant_control_deps)[n];
320	for (const Edge* e : n->in_edges()) {
321	if (constant_control_deps->count(e->src()) == `0`) {
322	// This branch is taken if the incoming edge is a control dependency,
323	// in which case we want to add it to the dependencies being
324	// accumulated for this node, or the incoming edge is not
325	// constant. The latter may happen when n is a shape node and the
326	// source has known shape. In that case add a control dependency from
327	// the source node, since there was previously a data dependency and
328	// we want to preserve sequencing constraints.
329	if (!e->src()->IsSource()) {
330	control_deps.insert(e->src());
331	}
332	} else {
333	// If the parent has been accumulating control dependencies, add all
334	// of its transitive control deps.
335	const gtl::FlatSet<Node*>& parent_deps =
336	(*constant_control_deps)[e->src()];
337	control_deps.insert(parent_deps.begin(), parent_deps.end());
338	}
339	}
340	nodes->push_back(n);
341	if (!n->IsConstant()) {
342	internal_node_inserted = true*;
343	}
344	}
345	}
346	}
347
348	// Returns the constant foldable nodes in `nodes` in topological order.
349	// Populates `constant_control_deps` with the non-constant control dependencies
350	// of each constant node.
351	void FindConstantFoldableNodes(
352	const Graph* graph, const ConstantFoldingOptions& opts,
353	std::vector<Node> nodes,
354	std::unordered_map<const Node, gtl::FlatSet<Node>>* constant_control_deps,
355	std::unordered_map<const Node, std::vector<Tensor>>
356	shape_replacement_map) {
357	bool internal_node_inserted = false;
358	// Walk the nodes in data flow order.
359	ReverseDFS(
360	graph, nullptr*,
361	[nodes, constant_control_deps, shape_replacement_map,
362	&internal_node_inserted, &opts](Node* n) {
363	ConsiderConstantFoldableNode(n, opts, nodes, constant_control_deps,
364	shape_replacement_map,
365	&internal_node_inserted);
366	},
367	NodeComparatorName ());
368	// If we have inserted just leaf level nodes, then there is nothing to fold.
369	if (!internal_node_inserted) {
370	nodes->clear();
371	constant_control_deps->clear();
372	}
373	}
374
375	typedef std::pair<Node, int*> NodeAndOutput;
376
377	// Adds n to constant_graph which is being built up for subsequent evaluation of
378	// constant propagation. node_map is the mapping of nodes in the original graph
379	// to nodes in the constant graph. The value of an entry in node_map is a vector
380	// of nodes because a ShapeN node in the original graph is replaced by a vector
381	// of Constant nodes in the constant graph.
382	void AddNodeToConstantGraph(
383	Node* n, std::unordered_map<Node, std::vector<Node>>* node_map,
384	Graph* constant_graph) {
385	std::vector<Node>& added = (node_map)[n];
386	added.push_back(constant_graph->CopyNode(n));
387	for (const Edge* in_edge : n->in_edges()) {
388	// Don't copy control edges to the constant graph.
389	if (!in_edge->IsControlEdge()) {
390	Node* in = in_edge->src();
391	auto it = node_map->find(in);
392	CHECK(it != node_map->end())
393	<< n->DebugString() << " <-" << in->DebugString();
394	if (it ->second.size() == `1`) {
395	constant_graph->AddEdge(it ->second [`0`], in_edge->src_output(), added [`0`],
396	in_edge->dst_input());
397	} else {
398	// The original source node had multiple outputs and was replaced by a
399	// vector of constants, so the edge comes from the 0th output of the kth
400	// added constant, rather than the kth output of the added node as in
401	// the standard case above.
402	constant_graph->AddEdge(it ->second [in_edge->src_output()], `0`, added [`0`],
403	in_edge->dst_input());
404	}
405	}
406	}
407	}
408
409	// Replaces constant-foldable shape node n by a vector of constants in
410	// constant_graph, which is being built up for subsequent evaluation of constant
411	// propagation. node_map is the mapping of nodes in the original graph to nodes
412	// in the constant graph. The value of an entry in node_map is a vector of nodes
413	// because a ShapeN node in the original graph is replaced by a vector of
414	// Constant nodes in the constant graph.
415	void AddShapeNodeToConstantGraph(
416	Node* n,
417	const std::unordered_map<const Node*, std::vector<Tensor>>&
418	shape_replacement_map,
419	std::unordered_map<Node, std::vector<Node>>* node_map,
420	const ConstantFoldNameGenerator& generate_new_name, Graph* constant_graph) {
421	std::vector<Node>& added = (node_map)[n];
422	const string& node_name = n->name();
423	for (const Tensor& t : shape_replacement_map.at(n)) {
424	auto builder =
425	NodeDefBuilder (generate_new_name (constant_graph, node_name), "Const")
426	.Attr("dtype", t.dtype())
427	.Attr("value", t);
428	NodeDef def;
429	CHECK(builder.Finalize(&def).ok());
430	Node* constant_node;
431	CHECK(NodeBuilder (builder).Finalize(constant_graph, &constant_node).ok());
432	added.push_back(constant_node);
433	}
434	// Don't copy incoming edges to shape nodes that are being replaced.
435	}
436
437	// Given the constant foldable nodes in 'nodes', returns a new graph 'g'. 'g'
438	// will contain copies of the nodes in 'nodes'. In addition, if there is an edge
439	// going from a node 'n' in 'nodes' to another node in 'orig_graph' but not in
440	// 'nodes', then 'tensors_to_fetch' will contain the mapping from the
441	// corresponding copy of 'n' and the edge number in 'g' to 'n'.
442	Graph* GetConstantGraph(
443	const Graph* orig_graph, const std::vector<Node*>& nodes,
444	const std::unordered_map<const Node*, std::vector<Tensor>>&
445	shape_replacement_map,
446	std::map<NodeAndOutput, NodeAndOutput>* tensors_to_fetch,
447	const ConstantFoldNameGenerator& generate_new_name) {
448	Graph* constant_graph = new Graph (orig_graph->op_registry());
449	std::unordered_map<Node, std::vector<Node>> node_map;
450	node_map [orig_graph->source_node()] = {constant_graph->source_node()};
451	node_map [orig_graph->sink_node()] = {constant_graph->sink_node()};
452	for (Node* n : nodes) {
453	if (shape_replacement_map.count(n) == `0`) {
454	AddNodeToConstantGraph(n, &node_map, constant_graph);
455	} else {
456	AddShapeNodeToConstantGraph(n, shape_replacement_map, &node_map,
457	generate_new_name, constant_graph);
458	}
459	}
460
461	for (auto const& added_nodes : node_map) {
462	for (const Edge* out_edge : added_nodes.first->out_edges()) {
463	if (node_map.count(out_edge->dst()) == `0`) {
464	if (out_edge->IsControlEdge()) continue;
465	if (added_nodes.second.size() == `1`) {
466	tensors_to_fetch->insert(
467	{{added_nodes.second [`0`], out_edge->src_output()},
468	{added_nodes.first, out_edge->src_output()}});
469	} else {
470	// The node had multiple outputs and was replaced by a
471	// vector of constants, so the NodeAndOutput is the 0th
472	// output of the kth added constant, rather than the kth
473	// output of the added node as in the standard case above.
474	tensors_to_fetch->insert(
475	{{added_nodes.second [out_edge->src_output()], `0`},
476	{added_nodes.first, out_edge->src_output()}});
477	}
478	}
479	}
480	}
481
482	return constant_graph;
483	}
484
485	// Replaces the identified Tensor in 'graph' by a 'Const' node with
486	// the value supplied in 'constant'. 'partition_device', if non-null
487	// is the device where the graph executes. Returns true if the
488	// replacement was successful, false otherwise.
489	// 'control_deps' is the set of nodes that should be control predecessors of the
490	// new constant node.
491	bool ReplaceTensorWithConstant(
492	Graph* graph, const Device* partition_device, NodeAndOutput tensor,
493	const Tensor& constant, const gtl::FlatSet<Node*>& control_deps,
494	int64_t max_constant_size_in_bytes,
495	const ConstantFoldNameGenerator& generate_new_name) {
496	// Be conservative when replacing a tensor with a constant, when not
497	// running on CPU.
498	// 1) Do not replace another constant.
499	// 2) If the destination tensor or any other tensor from the same node is not
500	// an int32 tensor, and has HOST_MEMORY constraint, do not replace it.
501	// 3) If the destination tensor or any other tensor from the same node is an
502	// int32 tensor, and has DEVICE_MEMORY constraint, do not replace it.
503	// 4) If the size of the constant in bytes is too large (>
504	// max_constant_in_bytes), do not replace it. This prevents the size of the
505	// Graph from growing too large.
506	// 5) If the constant op created does not have a kernel implementation
507	// for the device, do not use it.
508	// TODO(keveman): Consider adding a new constant op that has a kernel
509	// implementation for all types, but with HostMemory constraint on it's
510	// output.
511	if (tensor.first->IsConstant()) {
512	return false;
513	}
514	DeviceType device_type = partition_device
515	? DeviceType {partition_device->device_type()}
516	: DEVICE_CPU;
517	if (partition_device && device_type != DEVICE_CPU) {
518	MemoryTypeVector input_mvec;
519	MemoryTypeVector output_mvec;
520	if (!MemoryTypesForNode(graph->op_registry(), device_type,
521	tensor.first->def(), &input_mvec, &output_mvec)
522	.ok()) {
523	return false;
524	}
525	for (int i = `0`; i < output_mvec.size(); i++) {
526	MemoryType memory_type = output_mvec [i];
527	bool is_int32 = tensor.first->output_type(i) == DT_INT32;
528	if ((memory_type == HOST_MEMORY && !is_int32) \|\|
529	(memory_type == DEVICE_MEMORY && is_int32)) {
530	return false;
531	}
532	}
533	}
534	if (constant.TotalBytes() > max_constant_size_in_bytes) {
535	return false;
536	}
537
538	Node* n = tensor.first;
539	std::vector<const Edge*> edges_to_remove;
540	for (const Edge* out_edge : n->out_edges()) {
541	if (out_edge->src_output() == tensor.second) {
542	edges_to_remove.push_back(out_edge);
543	}
544	}
545	const string& node_name = n->name();
546	Node* constant_node;
547	auto builder = NodeDefBuilder (generate_new_name (graph, node_name), "Const")
548	.Attr("dtype", constant.dtype())
549	.Attr("value", constant);
550	if (partition_device) {
551	builder.Device(partition_device->name());
552	}
553	NodeDef def;
554	if (!builder.Finalize(&def).ok()) {
555	return false;
556	}
557	const KernelDef* kdef;
558	if (!FindKernelDef(device_type, def, &kdef, nullptr).ok()) {
559	return false;
560	}
561
562	VLOG(`1`) << "Replacing " << tensor.first->name() << " :: " << tensor.second
563	<< " with a constant";
564
565	if (!NodeBuilder (builder).Finalize(graph, &constant_node).ok()) {
566	return false;
567	}
568	for (auto edge : edges_to_remove) {
569	graph->AddEdge(constant_node, `0`, edge->dst(), edge->dst_input());
570	graph->RemoveEdge(edge);
571	}
572	if (control_deps.empty()) {
573	graph->AddControlEdge(graph->source_node(), constant_node);
574	} else {
575	for (Node* node : control_deps) {
576	graph->AddControlEdge(node, constant_node);
577	}
578	}
579	if (partition_device) {
580	constant_node->set_assigned_device_name(partition_device->name());
581	}
582	return true;
583	}
584
585	} // namespace
586
587	Status ConstantFold(const ConstantFoldingOptions& opts,
588	FunctionLibraryRuntime* function_library, Env* env,
589	const Device* partition_device, Graph* graph,
590	bool* was_mutated) {
591	// TensorFlow flushes denormals to zero and rounds to nearest, so we do
592	// the same here.
593	port::ScopedFlushDenormal flush;
594	port::ScopedSetRound round(FE_TONEAREST);
595
596	DumpGraph("Before", graph);
597
598	ConstantFoldNameGenerator generate_new_name = opts.generate_new_name;
599	std::atomic_int_fast64_t constant_unique_id{`0`};
600	if (generate_new_name == nullptr) {
601	generate_new_name = [&constant_unique_id](Graph* graph, string old_name) {
602	return strings::StrCat(graph->NewName(old_name), "__cf__",
603	constant_unique_id.fetch_add(`1`));
604	};
605	}
606
607	std::vector<Node*> constant_foldable_nodes;
608	std::unordered_map<const Node, gtl::FlatSet<Node>> constant_control_deps;
609	std::unordered_map<const Node*, std::vector<Tensor>> shape_replacement_map;
610	FindConstantFoldableNodes(graph, opts, &constant_foldable_nodes,
611	&constant_control_deps, &shape_replacement_map);
612	if (constant_foldable_nodes.empty()) {
613	VLOG(`1`) << "No constant foldable nodes found";
614	was_mutated = false*;
615	// This is not an error, so return the status as OK.
616	return OkStatus();
617	}
618
619	std::map<NodeAndOutput, NodeAndOutput> tensors_to_fetch;
620	std::unique_ptr<Graph> constant_graph(
621	GetConstantGraph(graph, constant_foldable_nodes, shape_replacement_map,
622	&tensors_to_fetch, generate_new_name));
623	DumpGraph("Constant graph", constant_graph.get());
624
625	if (tensors_to_fetch.empty()) {
626	VLOG(`1`) << "No constant nodes found that feed into the original graph.";
627	was_mutated = false*;
628	// This is not an error, so return the status as OK.
629	return OkStatus();
630	}
631	VLOG(`1`) << "Constant foldable " << constant_graph ->num_node_ids() << " : "
632	<< graph->num_node_ids();
633
634	std::vector<string> tensors_to_fetch_names;
635	std::vector<NodeAndOutput> tensors_to_replace;
636	// Sorting the nodes based on the name gives us a stable ordering between runs
637	// for the same graph.
638	std::vector<std::pair<NodeAndOutput, NodeAndOutput>> tensors_to_fetch_sorted(
639	tensors_to_fetch.begin(), tensors_to_fetch.end());
640	std::sort(tensors_to_fetch_sorted.begin(), tensors_to_fetch_sorted.end(),
641	[](const std::pair<NodeAndOutput, NodeAndOutput>& n1,
642	const std::pair<NodeAndOutput, NodeAndOutput>& n2) {
643	return std::tie(n1.first.first->name(), n1.first.second) <
644	std::tie(n2.first.first->name(), n2.first.second);
645	});
646	for (auto n : tensors_to_fetch_sorted) {
647	tensors_to_fetch_names.push_back(
648	strings::StrCat(n.first.first->name(), ":", n.first.second));
649	tensors_to_replace.push_back(n.second);
650	}
651
652	auto graph_runner = std::unique_ptr<GraphRunner>(new GraphRunner (env));
653	// Evaluate the constant foldable nodes.
654	std::vector<Tensor> outputs;
655	auto delete_tensors = gtl::MakeCleanup([&graph_runner, &outputs] {
656	// Output tensors need to be cleared before the GraphRunner is deleted.
657	outputs.clear();
658	graph_runner.reset(nullptr);
659	});
660
661	Status s =
662	graph_runner ->Run(constant_graph.get(), function_library, {} / inputs/,
663	tensors_to_fetch_names, &outputs);
664	if (!s.ok()) {
665	VLOG(`1`) << "Could not fetch constants: " << s;
666	was_mutated = false*;
667	return s;
668	}
669
670	// Fetch the constant tensors and replace the corresponding tensors in the
671	// original graph with those constants.
672	int32_t num_nodes_replaced = `0`;
673	for (size_t c = `0`; c < outputs.size(); ++c) {
674	const gtl::FlatSet<Node*>& control_deps =
675	constant_control_deps [tensors_to_replace [c].first];
676	if (ReplaceTensorWithConstant(
677	graph, partition_device, tensors_to_replace [c], outputs [c],
678	control_deps, opts.max_constant_size_in_bytes, generate_new_name)) {
679	++num_nodes_replaced;
680	}
681	}
682
683	DumpGraph("After", graph);
684
685	*was_mutated = (num_nodes_replaced > `0`);
686	return OkStatus();
687	}
688
689	} // namespace tensorflow
690

Browse the source code of tensorflow/tensorflow/core/common_runtime/constant_folding.cc