slice_op.cc source code [tensorflow/tensorflow/core/kernels/slice_op.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/array_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
21	#define EIGEN_USE_GPU
22	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
23
24	#include "tensorflow/core/kernels/slice_op.h"
25
26	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27	#include "tensorflow/core/framework/op_kernel.h"
28	#include "tensorflow/core/framework/register_types.h"
29	#include "tensorflow/core/framework/tensor.h"
30	#include "tensorflow/core/kernels/ops_util.h"
31	#include "tensorflow/core/lib/core/status.h"
32	#include "tensorflow/core/lib/gtl/array_slice.h"
33	#include "tensorflow/core/platform/prefetch.h"
34
35	namespace tensorflow {
36
37	namespace {
38
39	void IntTensorToInt64Vec(const Tensor& tensor,
40	gtl::InlinedVector<int64_t, `4`>* out) {
41	out->resize(tensor.NumElements());
42	int64_t* out_ptr = out->data();
43	if (tensor.dtype() == DT_INT32) {
44	const int32* tensor_ptr = tensor.flat<int32>().data();
45	for (int64_t i = `0`; i < tensor.NumElements(); ++i) {
46	out_ptr[i] = tensor_ptr[i];
47	}
48	} else if (tensor.dtype() == DT_INT64) {
49	const int64_t* tensor_ptr = tensor.flat<int64_t>().data();
50	for (int64_t i = `0`; i < tensor.NumElements(); ++i) {
51	out_ptr[i] = tensor_ptr[i];
52	}
53	} else {
54	LOG(FATAL) << "begin must be either int32 or int64";
55	}
56	}
57
58	typedef Eigen::ThreadPoolDevice CPUDevice;
59	typedef Eigen::GpuDevice GPUDevice;
60
61	// Shared code that is not dependent on the type of T. We do this to reduce
62	// code size by not duplicating all this for all T (float, double, int32, etc.)
63	void SharedSliceValidation(OpKernelContext* context, const Tensor& input,
64	TensorShape* output_shape, bool* is_identity,
65	bool* slice_dim0,
66	gtl::InlinedVector<int64_t, `4`>* begin,
67	gtl::InlinedVector<int64_t, `4`>* size) {
68	const Tensor& begin_tensor = context->input(`1`);
69	const Tensor& size_tensor = context->input(`2`);
70
71	OP_REQUIRES(
72	context,
73	TensorShapeUtils::IsVector(begin_tensor.shape()) &&
74	TensorShapeUtils::IsVector(size_tensor.shape()) &&
75	begin_tensor.NumElements() == input.dims() &&
76	size_tensor.NumElements() == input.dims(),
77	errors::InvalidArgument(
78	"Expected begin and size arguments to be 1-D tensors of size ",
79	input.dims(), ", but got shapes ", begin_tensor.shape().DebugString(),
80	" and ", size_tensor.shape().DebugString(), " instead."));
81
82	const int input_dims = input.dims();
83	IntTensorToInt64Vec(begin_tensor, begin);
84	IntTensorToInt64Vec(size_tensor, size);
85	for (int i = `0`; i < input_dims; ++i) {
86	if ((*size)[i] == -`1`) {
87	// A size[i] of -1 means "all elements from begin[i] to dim_size(i)".
88	(size)[i] = input.dim_size(i) - (begin)[i];
89	}
90	}
91
92	is_identity = true*;
93	slice_dim0 = true*;
94	for (int i = `0`; i < input_dims; ++i) {
95	int64_t b = (*begin)[i];
96	int64_t s = (*size)[i];
97	if (input.dim_size(i) == `0`) {
98	OP_REQUIRES(
99	context, b == `0` && s == `0`,
100	errors::InvalidArgument("Expected begin[", i, "] == 0 (got ", b,
101	") and size[", i, "] == 0 ", "(got ", s,
102	") when ", "input.dim_size(", i, ") == 0"));
103	} else {
104	OP_REQUIRES(context, `0` <= b && b <= input.dim_size(i),
105	errors::InvalidArgument("Expected begin[", i, "] in [0, ",
106	input.dim_size(i), "], but got ", b));
107	OP_REQUIRES(
108	context, `0` <= s && b + s <= input.dim_size(i),
109	errors::InvalidArgument("Expected size[", i, "] in [0, ",
110	input.dim_size(i) - b, "], but ", "got ", s));
111	}
112	output_shape->AddDim(s);
113	const bool take_all = (b == `0`) && (s == input.dim_size(i));
114	(*is_identity) &= take_all;
115	(*slice_dim0) &= (i == `0`) \|\| take_all;
116	}
117	}
118
119	// Extracted out code in SliceOp::Compute so that MklSliceOp can reuse this
120	// generic code
121	template <typename T>
122	static void SharedSliceCommonCases(OpKernelContext* context,
123	const Tensor& input,
124	gtl::InlinedVector<int64, `4`>* begin,
125	gtl::InlinedVector<int64, `4`>* size,
126	Tensor** result, bool* done) {
127	bool is_identity = true;
128	bool slice_dim0 = true;
129	TensorShape output_shape;
130	done = false*;
131
132	SharedSliceValidation(context, input, &output_shape, &is_identity,
133	&slice_dim0, begin, size);
134	if (!context->status().ok()) return;
135	if (is_identity) {
136	VLOG(`1`) << "Slice identity";
137	context->set_output(`0`, input);
138	done = true*;
139	return;
140	}
141
142	if (slice_dim0 &&
143	IsDim0SliceAligned<T>(input.shape(), (begin)[`0`], (size)[`0`])) {
144	VLOG(`1`) << "Slice dim 0: " << input.shape().DebugString();
145	CHECK_GE(input.dims(), `1`); // Otherwise, is_identity should be true.
146	context->set_output(`0`, input.Slice((begin)[`0`], (begin)[`0`] + (*size)[`0`]));
147	done = true*;
148	return;
149	}
150
151	OP_REQUIRES_OK(context, context->allocate_output(`0`, output_shape, result));
152	}
153
154	template <typename Device, typename T>
155	class SliceOp : public OpKernel {
156	public:
157	explicit SliceOp(OpKernelConstruction* context) : OpKernel(context) {}
158
159	void Compute(OpKernelContext* context) override {
160	gtl::InlinedVector<int64_t, `4`> begin;
161	gtl::InlinedVector<int64_t, `4`> size;
162	const Tensor& input = context->input(`0`);
163	Tensor* result = nullptr;
164	bool done = false;
165	SharedSliceCommonCases<T>(context, input, &begin, &size, &result, &done);
166	if (!context->status().ok() \|\| done == true) return;
167
168	const int input_dims = input.dims();
169
170	if (result->NumElements() > `0`) {
171	if (std::is_same<Device, CPUDevice>::value && input_dims == `2` &&
172	DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())) {
173	auto input_t = input.tensor<T, `2`>();
174	auto output_t = result->tensor<T, `2`>();
175
176	const int64_t row_begin = begin [`0`];
177	const int64_t col_begin = begin [`1`];
178	const int64_t row_size = size [`0`];
179	const int64_t col_size = size [`1`];
180
181	// TODO(agarwal): Consider multi-threading this loop for cases where
182	// row_size is very large.
183	for (int i = `0`; i < row_size; ++i) {
184	const int64_t row = row_begin + i;
185	if (i + `1` < size [`0`]) {
186	port::prefetch<port::PREFETCH_HINT_T0>(&output_t(i + `1`, `0`));
187	port::prefetch<port::PREFETCH_HINT_T0>(
188	&input_t(row + `1`, col_begin));
189	}
190	memcpy(&output_t(i, `0`), &input_t(row, col_begin),
191	col_size * sizeof(T));
192	}
193	return;
194	}
195	#define HANDLE_DIM(NDIM) \
196	if (input_dims == NDIM) { \
197	HandleCase<NDIM>(context, begin, size, input, result); \
198	return; \
199	}
200
201	HANDLE_DIM(`1`);
202	HANDLE_DIM(`2`);
203	HANDLE_DIM(`3`);
204	HANDLE_DIM(`4`);
205	HANDLE_DIM(`5`);
206	HANDLE_DIM(`6`);
207	HANDLE_DIM(`7`);
208	HANDLE_DIM(`8`);
209
210	#undef HANDLE_DIM
211
212	OP_REQUIRES(
213	context, false,
214	errors::Unimplemented("SliceOp : Unhandled input dimensions"));
215	}
216	}
217
218	private:
219	template <int NDIM>
220	void HandleCase(OpKernelContext* context, gtl::ArraySlice<int64_t> begin,
221	gtl::ArraySlice<int64_t> size, const Tensor& input,
222	Tensor* result) {
223	Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
224	Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
225	for (int i = `0`; i < NDIM; ++i) {
226	indices[i] = begin [i];
227	sizes[i] = size [i];
228	}
229
230	functor::Slice<Device, T, NDIM>()(context->eigen_device<Device>(),
231	result->tensor<T, NDIM>(),
232	input.tensor<T, NDIM>(), indices, sizes);
233	}
234	};
235
236	} // namespace
237
238	// Forward declarations of the functor specializations for declared in the
239	// sharded source files.
240	namespace functor {
241	#define DECLARE_CPU_SPEC(T, NDIM) \
242	template <> \
243	void Slice<CPUDevice, T, NDIM>::operator()( \
244	const CPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
245	typename TTypes<T, NDIM>::ConstTensor input, \
246	const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices, \
247	const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes); \
248	extern template struct Slice<CPUDevice, T, NDIM>;
249
250	#define DECLARE_FOR_N(T) \
251	DECLARE_CPU_SPEC(T, 1); \
252	DECLARE_CPU_SPEC(T, 2); \
253	DECLARE_CPU_SPEC(T, 3); \
254	DECLARE_CPU_SPEC(T, 4); \
255	DECLARE_CPU_SPEC(T, 5); \
256	DECLARE_CPU_SPEC(T, 6); \
257	DECLARE_CPU_SPEC(T, 7); \
258	DECLARE_CPU_SPEC(T, 8);
259
260	TF_CALL_ALL_TYPES(DECLARE_FOR_N);
261
262	#undef DECLARE_FOR_N
263	#undef DECLARE_CPU_SPEC
264	} // namespace functor
265
266	#define REGISTER_SLICE(type) \
267	REGISTER_KERNEL_BUILDER(Name("Slice") \
268	.Device(DEVICE_CPU) \
269	.TypeConstraint<type>("T") \
270	.HostMemory("begin") \
271	.HostMemory("size"), \
272	SliceOp<CPUDevice, type>)
273
274	TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
275	TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
276	#undef REGISTER_SLICE
277
278	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
279	// Forward declarations of the functor specializations for GPU.
280	namespace functor {
281	#define DECLARE_GPU_SPEC(T, NDIM) \
282	template <> \
283	void Slice<GPUDevice, T, NDIM>::operator()( \
284	const GPUDevice& d, typename TTypes<T, NDIM>::Tensor output, \
285	typename TTypes<T, NDIM>::ConstTensor input, \
286	const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices, \
287	const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes); \
288	extern template struct Slice<GPUDevice, T, NDIM>;
289
290	#define DECLARE_FOR_N(T) \
291	DECLARE_GPU_SPEC(T, 1); \
292	DECLARE_GPU_SPEC(T, 2); \
293	DECLARE_GPU_SPEC(T, 3); \
294	DECLARE_GPU_SPEC(T, 4); \
295	DECLARE_GPU_SPEC(T, 5); \
296	DECLARE_GPU_SPEC(T, 6); \
297	DECLARE_GPU_SPEC(T, 7); \
298	DECLARE_GPU_SPEC(T, 8);
299
300	TF_CALL_bfloat16(DECLARE_FOR_N);
301	TF_CALL_int8(DECLARE_FOR_N);
302	TF_CALL_int32(DECLARE_FOR_N);
303	TF_CALL_int64(DECLARE_FOR_N);
304	TF_CALL_GPU_ALL_TYPES(DECLARE_FOR_N);
305
306	#undef DECLARE_FOR_N
307	#undef DECLARE_GPU_SPEC
308	} // namespace functor
309
310	#define REGISTER_GPU(type) \
311	REGISTER_KERNEL_BUILDER(Name("Slice") \
312	.Device(DEVICE_GPU) \
313	.TypeConstraint<type>("T") \
314	.HostMemory("begin") \
315	.HostMemory("size"), \
316	SliceOp<GPUDevice, type>)
317
318	TF_CALL_bfloat16(REGISTER_GPU);
319	TF_CALL_int8(REGISTER_GPU);
320	TF_CALL_int64(REGISTER_GPU);
321	TF_CALL_GPU_ALL_TYPES(REGISTER_GPU);
322
323	#undef REGISTER_GPU
324
325	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
326
327	// A special DEVICE_DEFAULT kernel for int32.
328	// TODO(b/25387198): Also enable int32 in device memory. This kernel
329	// registration requires all int32 inputs and outputs to be in host memory.
330	REGISTER_KERNEL_BUILDER(Name("Slice")
331	.Device(DEVICE_DEFAULT)
332	.TypeConstraint<int32>("T")
333	.HostMemory("input")
334	.HostMemory("begin")
335	.HostMemory("size")
336	.HostMemory("output"),
337	SliceOp<CPUDevice, int32>);
338
339	} // namespace tensorflow
340

Browse the source code of tensorflow/tensorflow/core/kernels/slice_op.cc