1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// See docs in ../ops/array_ops.cc.
17
18#define EIGEN_USE_THREADS
19
20#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
21 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
22#define EIGEN_USE_GPU
23#endif
24
25#include "tensorflow/core/kernels/constant_op.h"
26
27#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28#include "tensorflow/core/framework/allocator.h"
29#include "tensorflow/core/framework/bounds_check.h"
30#include "tensorflow/core/framework/node_def.pb.h"
31#include "tensorflow/core/framework/register_types.h"
32#include "tensorflow/core/framework/tensor.h"
33#include "tensorflow/core/framework/tensor.pb.h"
34#include "tensorflow/core/framework/tensor_shape.h"
35#include "tensorflow/core/framework/tensor_types.h"
36#include "tensorflow/core/framework/types.h"
37#include "tensorflow/core/framework/variant_op_registry.h"
38#include "tensorflow/core/graph/graph_node_util.h"
39#include "tensorflow/core/kernels/fill_functor.h"
40#include "tensorflow/core/platform/macros.h"
41#include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
42
43namespace tensorflow {
44
45namespace {
46
47NodeDef StripTensorDataFromNodeDef(OpKernelConstruction* ctx) {
48 const NodeDef& original = ctx->def();
49 if (std::is_base_of<protobuf::Message, NodeDef>()) {
50 DCHECK_EQ(reinterpret_cast<const protobuf::Message*>(&original)
51 ->GetDescriptor()
52 ->field_count(),
53 7)
54 << "The NodeDef format has changed, and the attr-stripping code may "
55 "need to be updated.";
56 }
57 NodeDef ret;
58 ret.set_name(original.name());
59 ret.set_op(original.op());
60 ret.set_device(original.device());
61 // Strip the "value" attr from the returned NodeDef.
62 // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
63 // attrs that affect the cardinality of list-typed inputs and outputs, so it
64 // is safe to drop other attrs from the NodeDef.
65 AddNodeAttr("dtype", ctx->output_type(0), &ret);
66 MergeDebugInfo(original, &ret);
67 if (original.has_experimental_type()) {
68 *ret.mutable_experimental_type() = original.experimental_type();
69 }
70 return ret;
71}
72
73} // namespace
74
75ConstantOp::ConstantOp(OpKernelConstruction* ctx)
76 : OpKernel(ctx, StripTensorDataFromNodeDef(ctx), false),
77 tensor_(ctx->output_type(0)) {
78 const TensorProto* proto = nullptr;
79 profiler::ScopedMemoryDebugAnnotation op_annotation(name_view().data());
80 OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
81 OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
82 *proto, AllocatorAttributes(), &tensor_));
83 OP_REQUIRES(
84 ctx, ctx->output_type(0) == tensor_.dtype(),
85 errors::InvalidArgument("Type mismatch between value (",
86 DataTypeString(tensor_.dtype()), ") and dtype (",
87 DataTypeString(ctx->output_type(0)), ")"));
88}
89
90void ConstantOp::Compute(OpKernelContext* ctx) {
91 ctx->set_output(0, tensor_);
92 if (TF_PREDICT_FALSE(ctx->track_allocations())) {
93 ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
94 }
95}
96
97ConstantOp::~ConstantOp() {}
98
99REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
100REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_TPU_SYSTEM), ConstantOp);
101
102#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
103 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
104#define REGISTER_KERNEL(D, TYPE) \
105 REGISTER_KERNEL_BUILDER( \
106 Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
107 ConstantOp);
108REGISTER_KERNEL(GPU, Eigen::half);
109REGISTER_KERNEL(GPU, bfloat16);
110REGISTER_KERNEL(GPU, float);
111REGISTER_KERNEL(GPU, double);
112REGISTER_KERNEL(GPU, uint8);
113REGISTER_KERNEL(GPU, int8);
114REGISTER_KERNEL(GPU, qint8);
115REGISTER_KERNEL(GPU, uint16);
116REGISTER_KERNEL(GPU, int16);
117REGISTER_KERNEL(GPU, qint16);
118REGISTER_KERNEL(GPU, quint16);
119REGISTER_KERNEL(GPU, uint32);
120REGISTER_KERNEL(GPU, qint32);
121REGISTER_KERNEL(GPU, int64_t);
122REGISTER_KERNEL(GPU, uint64);
123REGISTER_KERNEL(GPU, complex64);
124REGISTER_KERNEL(GPU, complex128);
125REGISTER_KERNEL(GPU, bool);
126REGISTER_KERNEL(GPU, Variant);
127#undef REGISTER_KERNEL
128#endif
129
130#define REGISTER_DEFAULT_KERNEL(TYPE) \
131 REGISTER_KERNEL_BUILDER( \
132 Name("Const").Device(DEVICE_DEFAULT).TypeConstraint<TYPE>("dtype"), \
133 ConstantOp);
134TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
135TF_CALL_QUANTIZED_TYPES(REGISTER_DEFAULT_KERNEL);
136TF_CALL_qint16(REGISTER_DEFAULT_KERNEL);
137TF_CALL_quint16(REGISTER_DEFAULT_KERNEL);
138TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
139TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
140#undef REGISTER_DEFAULT_KERNEL
141
142typedef Eigen::ThreadPoolDevice CPUDevice;
143typedef Eigen::GpuDevice GPUDevice;
144
145template <typename Device, typename T, typename Index>
146class FillOp : public OpKernel {
147 public:
148 explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
149
150 void Compute(OpKernelContext* context) override {
151 const Tensor& Tdims = context->input(0);
152 OP_REQUIRES(
153 context,
154 // TODO(rmlarsen): Disallow legacy use of scalars to represent shape.
155 (TensorShapeUtils::IsVector(Tdims.shape()) ||
156 TensorShapeUtils::IsScalar(Tdims.shape())),
157 errors::InvalidArgument("dims must represent a vector, got shape ",
158 Tdims.shape().DebugString()));
159 const Tensor& Tvalue = context->input(1);
160 OP_REQUIRES(
161 context,
162 // TODO(rmlarsen): Disallow legacy use of length-1 vector to represent
163 // scalar.
164 TensorShapeUtils::IsScalar(Tvalue.shape()) ||
165 (TensorShapeUtils::IsVector(Tvalue.shape()) &&
166 Tvalue.shape().dim_size(0) == 1),
167 errors::InvalidArgument("value must represent a scalar, got shape ",
168 Tvalue.shape().DebugString()));
169 auto dims = Tdims.flat<Index>();
170 TensorShape shape;
171 OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
172 reinterpret_cast<const Index*>(dims.data()),
173 dims.size(), &shape));
174 Tensor* out = nullptr;
175 OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
176 functor::FillFunctor<Device, T> functor;
177 functor(context->eigen_device<Device>(), out->flat<T>(),
178 Tvalue.scalar<T>());
179 }
180};
181
182#define REGISTER_KERNEL(D, TYPE) \
183 REGISTER_KERNEL_BUILDER(Name("Fill") \
184 .Device(DEVICE_##D) \
185 .TypeConstraint<TYPE>("T") \
186 .TypeConstraint<int32>("index_type") \
187 .HostMemory("dims"), \
188 FillOp<D##Device, TYPE, int32>); \
189 REGISTER_KERNEL_BUILDER(Name("Fill") \
190 .Device(DEVICE_##D) \
191 .TypeConstraint<TYPE>("T") \
192 .TypeConstraint<int64_t>("index_type") \
193 .HostMemory("dims"), \
194 FillOp<D##Device, TYPE, int64>);
195
196#define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
197TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
198// TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
199// the conversion from uint8 to quint8.
200REGISTER_KERNEL(CPU, quint8);
201REGISTER_KERNEL(CPU, quint16);
202REGISTER_KERNEL(CPU, qint8);
203REGISTER_KERNEL(CPU, qint16);
204REGISTER_KERNEL(CPU, qint32);
205#undef REGISTER_CPU_KERNEL
206
207#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
208 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
209REGISTER_KERNEL(GPU, Eigen::half);
210REGISTER_KERNEL(GPU, bfloat16);
211REGISTER_KERNEL(GPU, float);
212REGISTER_KERNEL(GPU, double);
213REGISTER_KERNEL(GPU, complex64);
214REGISTER_KERNEL(GPU, complex128);
215REGISTER_KERNEL(GPU, uint8);
216REGISTER_KERNEL(GPU, int8);
217REGISTER_KERNEL(GPU, uint16);
218REGISTER_KERNEL(GPU, int16);
219REGISTER_KERNEL(GPU, int64_t);
220REGISTER_KERNEL(GPU, bool);
221// Currently we do not support filling strings on GPU
222
223// A special DEVICE_DEFAULT kernel for int32.
224// TODO(b/25387198): Also enable int32 in device memory. This kernel
225// registration requires all int32 inputs and outputs to be in host memory.
226REGISTER_KERNEL_BUILDER(Name("Fill")
227 .Device(DEVICE_DEFAULT)
228 .TypeConstraint<int32>("T")
229 .TypeConstraint<int32>("index_type")
230 .HostMemory("dims")
231 .HostMemory("value")
232 .HostMemory("output"),
233 FillOp<CPUDevice, int32, int32>);
234#endif
235
236#undef REGISTER_KERNEL
237
238template <typename Device, typename T>
239class ZerosLikeOp : public OpKernel {
240 public:
241 explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
242
243 void Compute(OpKernelContext* ctx) override {
244 const Tensor& input = ctx->input(0);
245 const Device& d = ctx->eigen_device<Device>();
246 if (std::is_same<T, Variant>::value) {
247 OP_REQUIRES(
248 ctx, input.dims() == 0,
249 errors::InvalidArgument("ZerosLike non-scalar Tensor with "
250 "dtype=DT_VARIANT is not supported."));
251 const Variant& v = input.scalar<Variant>()();
252 // DT_VARIANT tensors must be allocated on CPU since they wrap C++
253 // objects which can not be efficiently represented in GPU memory.
254 int numa_node = ctx->device()->NumaNode();
255 Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
256 Variant* out_v = &(out.scalar<Variant>()());
257 OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
258 ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
259 ctx->set_output(0, out);
260 } else {
261 Tensor* out = nullptr;
262 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
263 {0}, 0, input.shape(), &out));
264 functor::SetZeroFunctor<Device, T> f;
265 f(d, out->flat<T>());
266 }
267 }
268};
269
270#define REGISTER_KERNEL(type, dev) \
271 REGISTER_KERNEL_BUILDER( \
272 Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
273 ZerosLikeOp<dev##Device, type>)
274
275#define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
276TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
277REGISTER_CPU(Variant);
278#undef REGISTER_CPU
279
280#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
281 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
282#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
283REGISTER_KERNEL(bool, GPU);
284REGISTER_KERNEL(Eigen::half, GPU);
285REGISTER_KERNEL(float, GPU);
286REGISTER_KERNEL(double, GPU);
287REGISTER_KERNEL(int64_t, GPU);
288REGISTER_KERNEL(complex64, GPU);
289REGISTER_KERNEL(complex128, GPU);
290#endif
291
292REGISTER_KERNEL(bfloat16, GPU);
293REGISTER_KERNEL(Variant, GPU);
294#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
295#undef REGISTER_KERNEL
296
297REGISTER_KERNEL_BUILDER(Name("ZerosLike")
298 .Device(DEVICE_DEFAULT)
299 .TypeConstraint<int32>("T")
300 .HostMemory("y"),
301 ZerosLikeOp<CPUDevice, int32>);
302
303template <typename Device, typename T>
304class OnesLikeOp : public OpKernel {
305 public:
306 explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
307
308 void Compute(OpKernelContext* ctx) override {
309 const Tensor& input = ctx->input(0);
310 Tensor* out = nullptr;
311 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
312 {0}, 0, input.shape(), &out));
313 functor::SetOneFunctor<Device, T> f;
314 f(ctx->eigen_device<Device>(), out->flat<T>());
315 }
316};
317
318#define REGISTER_KERNEL(type, dev) \
319 REGISTER_KERNEL_BUILDER( \
320 Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
321 OnesLikeOp<dev##Device, type>)
322
323#define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
324TF_CALL_POD_TYPES(REGISTER_CPU);
325#undef REGISTER_CPU
326
327#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
328 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
329#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
330REGISTER_KERNEL(bool, GPU);
331REGISTER_KERNEL(Eigen::half, GPU);
332REGISTER_KERNEL(float, GPU);
333REGISTER_KERNEL(double, GPU);
334REGISTER_KERNEL(int64_t, GPU);
335REGISTER_KERNEL(complex64, GPU);
336REGISTER_KERNEL(complex128, GPU);
337#endif
338REGISTER_KERNEL(bfloat16, GPU);
339REGISTER_KERNEL_BUILDER(Name("OnesLike")
340 .Device(DEVICE_DEFAULT)
341 .TypeConstraint<int32>("T")
342 .HostMemory("y"),
343 OnesLikeOp<CPUDevice, int32>);
344#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
345
346#undef REGISTER_KERNEL
347
348PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
349 OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
350}
351
352void PlaceholderOp::Compute(OpKernelContext* ctx) {
353 if (expected_shape_.dims() > 0) {
354 OP_REQUIRES(ctx, false,
355 errors::InvalidArgument(
356 "You must feed a value for placeholder tensor '", name(),
357 "' with dtype ", DataTypeString(output_type(0)),
358 " and shape ", expected_shape_.DebugString()));
359 } else {
360 OP_REQUIRES(ctx, false,
361 errors::InvalidArgument(
362 "You must feed a value for placeholder tensor '", name(),
363 "' with dtype ", DataTypeString(output_type(0))));
364 }
365}
366
367REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
368REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
369 PlaceholderOp);
370// The following GPU/Default kernel registration is used to address the
371// situation that a placeholder is added in a GPU device context and soft
372// placement is false. Since a placeholder should never be executed, adding
373// these GPU kernels has no effect on graph execution.
374REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_DEFAULT),
375 PlaceholderOp);
376REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_DEFAULT),
377 PlaceholderOp);
378} // namespace tensorflow
379