restore_shape_inference.cc source code [tensorflow/tensorflow/dtensor/mlir/restore_shape_inference.cc]

1	/ Copyright 2022 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include <memory>
17
18	#include "llvm/Support/FormatVariadic.h"
19	#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project
20	#include "mlir/IR/BuiltinOps.h" // from @llvm-project
21	#include "mlir/Pass/Pass.h" // from @llvm-project
22	#include "mlir/Support/LogicalResult.h" // from @llvm-project
23	#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
24	#include "tensorflow/dtensor/mlir/dtensor_send_recv.h"
25	#include "tensorflow/dtensor/mlir/ir/tf_dtensor.h"
26	#include "tensorflow/dtensor/mlir/shape_utils.h"
27	#include "tensorflow/dtensor/mlir/value_utils.h"
28
29	namespace tensorflow {
30	namespace dtensor {
31
32	namespace {
33	#define GEN_PASS_DEF_DTENSORINFERSHAPESFORRESTOREV2OP
34	#include "tensorflow/dtensor/mlir/dtensor_passes.h.inc"
35
36	// From the Operation that produces `value`, set the result type to `type`.
37	//
38	// Recursively set the result type to `type` going backward toward
39	// the tf.RestoreV2Op that produced the unknown shape associated with `value`.
40	mlir::LogicalResult BackwardShapeInferenceToRestoreOp(mlir::ModuleOp module,
41	mlir::OpBuilder* builder,
42	mlir::Value value,
43	mlir::Type type) {
44	mlir::Operation* op = value.getDefiningOp();
45	if (op == nullptr) return mlir::success();
46	if (!llvm::isa<mlir::TF::IdentityOp, mlir::TF::DTensorRecv,
47	mlir::TF::RestoreV2Op>(op)) {
48	return op->emitOpError(llvm::formatv(
49	"Expected an Identity, DTensorRecv, or RestoreV2 op, but got: {0}",
50	op->getName().getStringRef()));
51	}
52
53	builder->setInsertionPointAfter(op);
54
55	// Base case: If we got to the RestoreV2Op, then we got to the root
56	// of the unknown shape result. Set the type to `type` of the result index
57	// from `value`.
58	if (auto restore_op = llvm::dyn_cast_or_null<mlir::TF::RestoreV2Op>(op)) {
59	// This is usually a dangerous operation, but since we are backward
60	// propagating shapes and correctly setting the shapes backwards,
61	// we can modify the value itself here instead of creating a new
62	// RestoreV2 op.
63	//
64	// Creating a new RestoreV2 op and replacing all uses will make this
65	// algorithm run in O(N^2) where N = number of outputs of RestoreV2.
66	//
67	// Using setType(type) modifies in place and makes this algorithm run in
68	// O(N).
69	value.setType(type);
70	} else if (auto identity_op =
71	llvm::dyn_cast_or_null<mlir::TF::IdentityOp>(op)) {
72	auto new_identity_op = builder->create<mlir::TF::IdentityOp>(
73	identity_op.getLoc(), type, identity_op.input());
74	identity_op.output().replaceAllUsesWith(new_identity_op.output());
75	identity_op.erase();
76
77	// Recursively shape inference to the input of the identity op.
78	return BackwardShapeInferenceToRestoreOp(module, builder,
79	new_identity_op.input(), type);
80	} else if (auto recv_op = llvm::dyn_cast_or_null<mlir::TF::DTensorRecv>(op)) {
81	// If we have a DTensorRecv, then there is cross mesh action and the
82	// RestoreV2Op we want to fix is on the mesh of the corresponding
83	// DTensorSend. Set shape of this DTensorRecv first and go to the
84	// corresponding DTensorSend.
85	auto new_recv_op = builder->create<mlir::TF::DTensorRecv>(
86	recv_op.getLoc(), type, builder->getStringAttr(recv_op.key()),
87	mlir::TF::ShapeAttr::get(builder->getContext(),
88	type.dyn_cast<mlir::TensorType>()),
89	mlir::dtensor::LayoutAttr::get(builder->getContext(),
90	recv_op.layout()));
91
92	recv_op.replaceAllUsesWith(new_recv_op.output());
93	recv_op.erase();
94
95	auto send_op = GetCorrespondingDTensorSendRecvOp<mlir::TF::DTensorRecv>(
96	module, new_recv_op);
97
98	if (!send_op.ok())
99	return recv_op.emitOpError(send_op.status().error_message());
100
101	// Recursively shape inference to the input of the send op.
102	return BackwardShapeInferenceToRestoreOp(
103	module, builder, send_op.value()->getOperand(`0`), type);
104	}
105	return mlir::success();
106	}
107
108	// From every AssignVariableOp, if the value X that we are assigning to the
109	// resource tensor has unknown shape information, then value X might be
110	// from the result of a tf.RestoreV2 op.
111	//
112	// We can infer the unknown shape of the result of a tf.RestoreV2 op through
113	// the resource tensors of AssignVariableOps that consume the results.
114	//
115	// Thus, we propagate the underlying resource tensor shape and dtype backwards
116	// leading up to the tf.RestoreV2 op.
117	mlir::LogicalResult PropagateShapeInformationFromAssignVariableOp(
118	mlir::ModuleOp module) {
119	module.walk([&](mlir::TF::AssignVariableOp assign_op) {
120	// Check that the `value` has an unknown shape.
121	if (ValueRank(assign_op.value()) == -`1`) {
122	StatusOr<llvm::ArrayRef<int64_t>> shape =
123	GetShapeOfValue(assign_op.resource());
124	if (!shape.ok()) {
125	assign_op ->emitOpError(
126	"Resource tensor was expected to have shape information but was "
127	"missing it during CheckpointShapeInference.");
128	return mlir::WalkResult::interrupt();
129	}
130	// Propagete shape backwards to all the ops that use or produce
131	// the value with missing shape.
132	mlir::OpBuilder builder(assign_op);
133	mlir::Type known_type = GetSubtypeOrSelf(assign_op.resource());
134	if (mlir::failed(BackwardShapeInferenceToRestoreOp(
135	module, &builder, assign_op.value(), known_type))) {
136	assign_op ->emitOpError(
137	"Error doing Backward shape inference from AssignVariableOp during "
138	"CheckpointShapeInference.");
139	return mlir::WalkResult::interrupt();
140	}
141	}
142	return mlir::WalkResult::advance();
143	});
144
145	return mlir::success();
146	}
147
148	struct DTensorInferShapesForRestoreV2Op
149	: public impl::DTensorInferShapesForRestoreV2OpBase<
150	DTensorInferShapesForRestoreV2Op> {
151	void runOnOperation() override {
152	auto module = getOperation();
153	if (failed(PropagateShapeInformationFromAssignVariableOp(module)))
154	return signalPassFailure();
155	};
156	};
157
158	} // namespace
159
160	std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
161	CreateDTensorInferShapesForRestoreV2Op() {
162	return std::make_unique<DTensorInferShapesForRestoreV2Op>();
163	}
164
165	} // namespace dtensor
166	} // namespace tensorflow
167

Browse the source code of tensorflow/tensorflow/dtensor/mlir/restore_shape_inference.cc