constant_op.cc source code [tensorflow/tensorflow/core/kernels/constant_op.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/array_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) \|\| \
21	(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
22	#define EIGEN_USE_GPU
23	#endif
24
25	#include "tensorflow/core/kernels/constant_op.h"
26
27	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28	#include "tensorflow/core/framework/allocator.h"
29	#include "tensorflow/core/framework/bounds_check.h"
30	#include "tensorflow/core/framework/node_def.pb.h"
31	#include "tensorflow/core/framework/register_types.h"
32	#include "tensorflow/core/framework/tensor.h"
33	#include "tensorflow/core/framework/tensor.pb.h"
34	#include "tensorflow/core/framework/tensor_shape.h"
35	#include "tensorflow/core/framework/tensor_types.h"
36	#include "tensorflow/core/framework/types.h"
37	#include "tensorflow/core/framework/variant_op_registry.h"
38	#include "tensorflow/core/graph/graph_node_util.h"
39	#include "tensorflow/core/kernels/fill_functor.h"
40	#include "tensorflow/core/platform/macros.h"
41	#include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
42
43	namespace tensorflow {
44
45	namespace {
46
47	NodeDef StripTensorDataFromNodeDef(OpKernelConstruction* ctx) {
48	const NodeDef& original = ctx->def();
49	if (std::is_base_of<protobuf::Message, NodeDef>()) {
50	DCHECK_EQ(reinterpret_cast<const protobuf::Message*>(&original)
51	->GetDescriptor()
52	->field_count(),
53	`7`)
54	<< "The NodeDef format has changed, and the attr-stripping code may "
55	"need to be updated.";
56	}
57	NodeDef ret;
58	ret.set_name(original.name());
59	ret.set_op(original.op());
60	ret.set_device(original.device());
61	// Strip the "value" attr from the returned NodeDef.
62	// NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
63	// attrs that affect the cardinality of list-typed inputs and outputs, so it
64	// is safe to drop other attrs from the NodeDef.
65	AddNodeAttr("dtype", ctx->output_type(`0`), &ret);
66	MergeDebugInfo(original, &ret);
67	if (original.has_experimental_type()) {
68	*ret.mutable_experimental_type() = original.experimental_type();
69	}
70	return ret;
71	}
72
73	} // namespace
74
75	ConstantOp::ConstantOp(OpKernelConstruction* ctx)
76	: OpKernel (ctx, StripTensorDataFromNodeDef(ctx), false),
77	tensor_(ctx->output_type(`0`)) {
78	const TensorProto* proto = nullptr;
79	profiler::ScopedMemoryDebugAnnotation op_annotation(name_view().data());
80	OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
81	OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
82	*proto, AllocatorAttributes (), &tensor_));
83	OP_REQUIRES(
84	ctx, ctx->output_type(`0`) == tensor_.dtype(),
85	errors::InvalidArgument("Type mismatch between value (",
86	DataTypeString(tensor_.dtype()), ") and dtype (",
87	DataTypeString(ctx->output_type(`0`)), ")"));
88	}
89
90	void ConstantOp::Compute(OpKernelContext* ctx) {
91	ctx->set_output(`0`, tensor_);
92	if (TF_PREDICT_FALSE(ctx->track_allocations())) {
93	ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
94	}
95	}
96
97	ConstantOp::~ConstantOp() {}
98
99	REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
100	REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_TPU_SYSTEM), ConstantOp);
101
102	#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) \|\| \
103	(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
104	#define REGISTER_KERNEL(D, TYPE) \
105	REGISTER_KERNEL_BUILDER( \
106	Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
107	ConstantOp);
108	REGISTER_KERNEL(GPU, Eigen::half);
109	REGISTER_KERNEL(GPU, bfloat16);
110	REGISTER_KERNEL(GPU, float);
111	REGISTER_KERNEL(GPU, double);
112	REGISTER_KERNEL(GPU, uint8);
113	REGISTER_KERNEL(GPU, int8);
114	REGISTER_KERNEL(GPU, qint8);
115	REGISTER_KERNEL(GPU, uint16);
116	REGISTER_KERNEL(GPU, int16);
117	REGISTER_KERNEL(GPU, qint16);
118	REGISTER_KERNEL(GPU, quint16);
119	REGISTER_KERNEL(GPU, uint32);
120	REGISTER_KERNEL(GPU, qint32);
121	REGISTER_KERNEL(GPU, int64_t);
122	REGISTER_KERNEL(GPU, uint64);
123	REGISTER_KERNEL(GPU, complex64);
124	REGISTER_KERNEL(GPU, complex128);
125	REGISTER_KERNEL(GPU, bool);
126	REGISTER_KERNEL(GPU, Variant);
127	#undef REGISTER_KERNEL
128	#endif
129
130	#define REGISTER_DEFAULT_KERNEL(TYPE) \
131	REGISTER_KERNEL_BUILDER( \
132	Name("Const").Device(DEVICE_DEFAULT).TypeConstraint<TYPE>("dtype"), \
133	ConstantOp);
134	TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
135	TF_CALL_QUANTIZED_TYPES(REGISTER_DEFAULT_KERNEL);
136	TF_CALL_qint16(REGISTER_DEFAULT_KERNEL);
137	TF_CALL_quint16(REGISTER_DEFAULT_KERNEL);
138	TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
139	TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
140	#undef REGISTER_DEFAULT_KERNEL
141
142	typedef Eigen::ThreadPoolDevice CPUDevice;
143	typedef Eigen::GpuDevice GPUDevice;
144
145	template <typename Device, typename T, typename Index>
146	class FillOp : public OpKernel {
147	public:
148	explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
149
150	void Compute(OpKernelContext* context) override {
151	const Tensor& Tdims = context->input(`0`);
152	OP_REQUIRES(
153	context,
154	// TODO(rmlarsen): Disallow legacy use of scalars to represent shape.
155	(TensorShapeUtils::IsVector(Tdims.shape()) \|\|
156	TensorShapeUtils::IsScalar(Tdims.shape())),
157	errors::InvalidArgument("dims must represent a vector, got shape ",
158	Tdims.shape().DebugString()));
159	const Tensor& Tvalue = context->input(`1`);
160	OP_REQUIRES(
161	context,
162	// TODO(rmlarsen): Disallow legacy use of length-1 vector to represent
163	// scalar.
164	TensorShapeUtils::IsScalar(Tvalue.shape()) \|\|
165	(TensorShapeUtils::IsVector(Tvalue.shape()) &&
166	Tvalue.shape().dim_size(`0`) == `1`),
167	errors::InvalidArgument("value must represent a scalar, got shape ",
168	Tvalue.shape().DebugString()));
169	auto dims = Tdims.flat<Index>();
170	TensorShape shape;
171	OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
172	reinterpret_cast<const Index*>(dims.data()),
173	dims.size(), &shape));
174	Tensor* out = nullptr;
175	OP_REQUIRES_OK(context, context->allocate_output(`0`, shape, &out));
176	functor::FillFunctor<Device, T> functor;
177	functor(context->eigen_device<Device>(), out->flat<T>(),
178	Tvalue.scalar<T>());
179	}
180	};
181
182	#define REGISTER_KERNEL(D, TYPE) \
183	REGISTER_KERNEL_BUILDER(Name("Fill") \
184	.Device(DEVICE_##D) \
185	.TypeConstraint<TYPE>("T") \
186	.TypeConstraint<int32>("index_type") \
187	.HostMemory("dims"), \
188	FillOp<D##Device, TYPE, int32>); \
189	REGISTER_KERNEL_BUILDER(Name("Fill") \
190	.Device(DEVICE_##D) \
191	.TypeConstraint<TYPE>("T") \
192	.TypeConstraint<int64_t>("index_type") \
193	.HostMemory("dims"), \
194	FillOp<D##Device, TYPE, int64>);
195
196	#define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
197	TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
198	// TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
199	// the conversion from uint8 to quint8.
200	REGISTER_KERNEL(CPU, quint8);
201	REGISTER_KERNEL(CPU, quint16);
202	REGISTER_KERNEL(CPU, qint8);
203	REGISTER_KERNEL(CPU, qint16);
204	REGISTER_KERNEL(CPU, qint32);
205	#undef REGISTER_CPU_KERNEL
206
207	#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) \|\| \
208	(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
209	REGISTER_KERNEL(GPU, Eigen::half);
210	REGISTER_KERNEL(GPU, bfloat16);
211	REGISTER_KERNEL(GPU, float);
212	REGISTER_KERNEL(GPU, double);
213	REGISTER_KERNEL(GPU, complex64);
214	REGISTER_KERNEL(GPU, complex128);
215	REGISTER_KERNEL(GPU, uint8);
216	REGISTER_KERNEL(GPU, int8);
217	REGISTER_KERNEL(GPU, uint16);
218	REGISTER_KERNEL(GPU, int16);
219	REGISTER_KERNEL(GPU, int64_t);
220	REGISTER_KERNEL(GPU, bool);
221	// Currently we do not support filling strings on GPU
222
223	// A special DEVICE_DEFAULT kernel for int32.
224	// TODO(b/25387198): Also enable int32 in device memory. This kernel
225	// registration requires all int32 inputs and outputs to be in host memory.
226	REGISTER_KERNEL_BUILDER(Name("Fill")
227	.Device(DEVICE_DEFAULT)
228	.TypeConstraint<int32>("T")
229	.TypeConstraint<int32>("index_type")
230	.HostMemory("dims")
231	.HostMemory("value")
232	.HostMemory("output"),
233	FillOp<CPUDevice, int32, int32>);
234	#endif
235
236	#undef REGISTER_KERNEL
237
238	template <typename Device, typename T>
239	class ZerosLikeOp : public OpKernel {
240	public:
241	explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
242
243	void Compute(OpKernelContext* ctx) override {
244	const Tensor& input = ctx->input(`0`);
245	const Device& d = ctx->eigen_device<Device>();
246	if (std::is_same<T, Variant>::value) {
247	OP_REQUIRES(
248	ctx, input.dims() == `0`,
249	errors::InvalidArgument("ZerosLike non-scalar Tensor with "
250	"dtype=DT_VARIANT is not supported."));
251	const Variant& v = input.scalar<Variant>()();
252	// DT_VARIANT tensors must be allocated on CPU since they wrap C++
253	// objects which can not be efficiently represented in GPU memory.
254	int numa_node = ctx->device()->NumaNode();
255	Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape ({}));
256	Variant* out_v = &(out.scalar<Variant>()());
257	OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
258	ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
259	ctx->set_output(`0`, out);
260	} else {
261	Tensor* out = nullptr;
262	OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
263	{`0`}, `0`, input.shape(), &out));
264	functor::SetZeroFunctor<Device, T> f;
265	f(d, out->flat<T>());
266	}
267	}
268	};
269
270	#define REGISTER_KERNEL(type, dev) \
271	REGISTER_KERNEL_BUILDER( \
272	Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
273	ZerosLikeOp<dev##Device, type>)
274
275	#define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
276	TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
277	REGISTER_CPU(Variant);
278	#undef REGISTER_CPU
279
280	#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) \|\| \
281	(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
282	#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
283	REGISTER_KERNEL(bool, GPU);
284	REGISTER_KERNEL(Eigen::half, GPU);
285	REGISTER_KERNEL(float, GPU);
286	REGISTER_KERNEL(double, GPU);
287	REGISTER_KERNEL(int64_t, GPU);
288	REGISTER_KERNEL(complex64, GPU);
289	REGISTER_KERNEL(complex128, GPU);
290	#endif
291
292	REGISTER_KERNEL(bfloat16, GPU);
293	REGISTER_KERNEL(Variant, GPU);
294	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
295	#undef REGISTER_KERNEL
296
297	REGISTER_KERNEL_BUILDER(Name("ZerosLike")
298	.Device(DEVICE_DEFAULT)
299	.TypeConstraint<int32>("T")
300	.HostMemory("y"),
301	ZerosLikeOp<CPUDevice, int32>);
302
303	template <typename Device, typename T>
304	class OnesLikeOp : public OpKernel {
305	public:
306	explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
307
308	void Compute(OpKernelContext* ctx) override {
309	const Tensor& input = ctx->input(`0`);
310	Tensor* out = nullptr;
311	OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
312	{`0`}, `0`, input.shape(), &out));
313	functor::SetOneFunctor<Device, T> f;
314	f(ctx->eigen_device<Device>(), out->flat<T>());
315	}
316	};
317
318	#define REGISTER_KERNEL(type, dev) \
319	REGISTER_KERNEL_BUILDER( \
320	Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
321	OnesLikeOp<dev##Device, type>)
322
323	#define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
324	TF_CALL_POD_TYPES(REGISTER_CPU);
325	#undef REGISTER_CPU
326
327	#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) \|\| \
328	(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
329	#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
330	REGISTER_KERNEL(bool, GPU);
331	REGISTER_KERNEL(Eigen::half, GPU);
332	REGISTER_KERNEL(float, GPU);
333	REGISTER_KERNEL(double, GPU);
334	REGISTER_KERNEL(int64_t, GPU);
335	REGISTER_KERNEL(complex64, GPU);
336	REGISTER_KERNEL(complex128, GPU);
337	#endif
338	REGISTER_KERNEL(bfloat16, GPU);
339	REGISTER_KERNEL_BUILDER(Name("OnesLike")
340	.Device(DEVICE_DEFAULT)
341	.TypeConstraint<int32>("T")
342	.HostMemory("y"),
343	OnesLikeOp<CPUDevice, int32>);
344	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
345
346	#undef REGISTER_KERNEL
347
348	PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel (ctx) {
349	OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
350	}
351
352	void PlaceholderOp::Compute(OpKernelContext* ctx) {
353	if (expected_shape_.dims() > `0`) {
354	OP_REQUIRES(ctx, false,
355	errors::InvalidArgument(
356	"You must feed a value for placeholder tensor '", name(),
357	"' with dtype ", DataTypeString(output_type(`0`)),
358	" and shape ", expected_shape_.DebugString()));
359	} else {
360	OP_REQUIRES(ctx, false,
361	errors::InvalidArgument(
362	"You must feed a value for placeholder tensor '", name(),
363	"' with dtype ", DataTypeString(output_type(`0`))));
364	}
365	}
366
367	REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
368	REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
369	PlaceholderOp);
370	// The following GPU/Default kernel registration is used to address the
371	// situation that a placeholder is added in a GPU device context and soft
372	// placement is false. Since a placeholder should never be executed, adding
373	// these GPU kernels has no effect on graph execution.
374	REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_DEFAULT),
375	PlaceholderOp);
376	REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_DEFAULT),
377	PlaceholderOp);
378	} // namespace tensorflow
379

Browse the source code of tensorflow/tensorflow/core/kernels/constant_op.cc